From 97156897b2460fd1cc88d4ad54684e92323fc990 Mon Sep 17 00:00:00 2001 From: niko Date: Thu, 11 Oct 2012 16:22:47 +0800 Subject: [PATCH] format files to ANSI C style with coolformat change the download channels to oclchannles() fix bugs of arithm functions perf fix of bilateral bug fix of split test case add build_warps functions --- .../ocl/include/opencv2/ocl/matrix_operations.hpp | 83 +- modules/ocl/include/opencv2/ocl/ocl.hpp | 1639 +++-- modules/ocl/perf/interpolation.hpp | 10 +- modules/ocl/perf/main.cpp | 36 +- modules/ocl/perf/perf_arithm.cpp | 7011 +++++++++++--------- modules/ocl/perf/perf_blend.cpp | 112 +- modules/ocl/perf/perf_canny.cpp | 106 +- modules/ocl/perf/perf_columnsum.cpp | 64 +- modules/ocl/perf/perf_fft.cpp | 84 +- modules/ocl/perf/perf_filters.cpp | 1935 +++--- modules/ocl/perf/perf_gemm.cpp | 86 +- modules/ocl/perf/perf_haar.cpp | 213 +- modules/ocl/perf/perf_hog.cpp | 162 +- modules/ocl/perf/perf_imgproc.cpp | 3342 +++++----- modules/ocl/perf/perf_match_template.cpp | 172 +- modules/ocl/perf/perf_matrix_operation.cpp | 1157 ++-- modules/ocl/perf/perf_pyrdown.cpp | 128 +- modules/ocl/perf/perf_pyrup.cpp | 108 +- modules/ocl/perf/perf_split_merge.cpp | 782 +-- modules/ocl/perf/perf_surf.cpp | 96 +- modules/ocl/perf/precomp.cpp | 1 - modules/ocl/perf/utility.cpp | 20 +- modules/ocl/perf/utility.hpp | 39 +- modules/ocl/src/arithm.cpp | 433 +- modules/ocl/src/blend.cpp | 69 +- modules/ocl/src/brute_force_matcher.cpp | 1417 ++-- modules/ocl/src/build_warps.cpp | 280 + modules/ocl/src/canny.cpp | 75 +- modules/ocl/src/color.cpp | 6 +- modules/ocl/src/columnsum.cpp | 53 +- modules/ocl/src/fft.cpp | 105 +- modules/ocl/src/filtering.cpp | 394 +- modules/ocl/src/gemm.cpp | 201 +- modules/ocl/src/haar.cpp | 292 +- modules/ocl/src/hog.cpp | 1911 +++--- modules/ocl/src/imgproc.cpp | 998 +-- modules/ocl/src/initialization.cpp | 299 +- modules/ocl/src/interpolate_frames.cpp | 315 + modules/ocl/src/kernels/arithm_absdiff.cl | 29 +- modules/ocl/src/kernels/arithm_add.cl | 50 +- modules/ocl/src/kernels/arithm_add_scalar.cl | 12 +- modules/ocl/src/kernels/arithm_add_scalar_mask.cl | 19 +- modules/ocl/src/kernels/arithm_flip.cl | 19 +- modules/ocl/src/kernels/build_warps.cl | 237 + modules/ocl/src/kernels/filtering_boxFilter.cl | 16 +- modules/ocl/src/kernels/imgproc_bilateral.cl | 151 +- modules/ocl/src/kernels/imgproc_histogram.cl | 14 +- modules/ocl/src/kernels/interpolate_frames.cl | 252 + modules/ocl/src/match_template.cpp | 740 +-- modules/ocl/src/matrix_operations.cpp | 948 +-- modules/ocl/src/mcwutil.cpp | 20 +- modules/ocl/src/mcwutil.hpp | 6 +- modules/ocl/src/precomp.hpp | 22 +- modules/ocl/src/pyrdown.cpp | 8 +- modules/ocl/src/pyrlk.cpp | 530 +- modules/ocl/src/pyrup.cpp | 69 +- modules/ocl/src/split_merge.cpp | 60 +- modules/ocl/src/surf.cpp | 231 +- modules/ocl/test/main.cpp | 16 +- modules/ocl/test/test_arithm.cpp | 42 +- modules/ocl/test/test_blend.cpp | 39 +- modules/ocl/test/test_brute_force_matcher.cpp | 279 +- modules/ocl/test/test_canny.cpp | 51 +- modules/ocl/test/test_columnsum.cpp | 18 +- modules/ocl/test/test_fft.cpp | 54 +- modules/ocl/test/test_filters.cpp | 38 +- modules/ocl/test/test_gemm.cpp | 40 +- modules/ocl/test/test_haar.cpp | 191 +- modules/ocl/test/test_hog.cpp | 34 +- modules/ocl/test/test_imgproc.cpp | 377 +- modules/ocl/test/test_match_template.cpp | 89 +- modules/ocl/test/test_matrix_operation.cpp | 32 +- modules/ocl/test/test_pyrdown.cpp | 30 +- modules/ocl/test/test_pyrlk.cpp | 22 +- modules/ocl/test/test_pyrup.cpp | 50 +- modules/ocl/test/test_split_merge.cpp | 32 +- modules/ocl/test/utility.cpp | 2 +- modules/ocl/test/utility.hpp | 2 +- 78 files changed, 16210 insertions(+), 12895 deletions(-) create mode 100644 modules/ocl/src/build_warps.cpp create mode 100644 modules/ocl/src/interpolate_frames.cpp create mode 100644 modules/ocl/src/kernels/build_warps.cl create mode 100644 modules/ocl/src/kernels/interpolate_frames.cl diff --git a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp index 7db34f8..d528aeb 100644 --- a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp +++ b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp @@ -55,22 +55,22 @@ namespace cv //////////////////////////////// oclMat //////////////////////////////// //////////////////////////////////////////////////////////////////////// - inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) {} + inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {} - inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { if( _rows > 0 && _cols > 0 ) create( _rows, _cols, _type ); } - inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { if( _size.height > 0 && _size.width > 0 ) create( _size.height, _size.width, _type ); } inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { if(_rows > 0 && _cols > 0) { @@ -80,7 +80,7 @@ namespace cv } inline oclMat::oclMat(Size _size, int _type, const Scalar &_s) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { if( _size.height > 0 && _size.width > 0 ) { @@ -91,18 +91,18 @@ namespace cv inline oclMat::oclMat(const oclMat &m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), - refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels) + refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols) { if( refcount ) CV_XADD(refcount, 1); } - + inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), - datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { - cv::Mat m(_rows,_cols,_type,_data,_step); - upload(m); + cv::Mat m(_rows, _cols, _type, _data, _step); + upload(m); //size_t minstep = cols * elemSize(); //if( step == Mat::AUTO_STEP ) //{ @@ -117,14 +117,14 @@ namespace cv //} //dataend += step * (rows - 1) + minstep; } - + inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), - datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) + datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) { - cv::Mat m(_size,_type,_data,_step); - upload(m); + cv::Mat m(_size, _type, _data, _step); + upload(m); //size_t minstep = cols * elemSize(); //if( step == Mat::AUTO_STEP ) //{ @@ -152,7 +152,6 @@ namespace cv wholerows = m.wholerows; wholecols = m.wholecols; offset = m.offset; - download_channels = m.download_channels; if( rowRange == Range::all() ) rows = m.rows; else @@ -184,7 +183,7 @@ namespace cv inline oclMat::oclMat(const oclMat &m, const Rect &roi) : flags(m.flags), rows(roi.height), cols(roi.width), step(m.step), data(m.data), refcount(m.refcount), - datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels) + datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols) { flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; offset += roi.y * step + roi.x * elemSize(); @@ -197,7 +196,7 @@ namespace cv } inline oclMat::oclMat(const Mat &m) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0), download_channels(0) + : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0) { //clCxt = Context::getContext(); upload(m); @@ -227,7 +226,6 @@ namespace cv wholerows = m.wholerows; wholecols = m.wholecols; refcount = m.refcount; - download_channels = m.download_channels; } return *this; } @@ -327,10 +325,9 @@ namespace cv std::swap( dataend, b.dataend ); std::swap( refcount, b.refcount ); std::swap( offset, b.offset ); - std::swap( clCxt, b.clCxt ); + std::swap( clCxt, b.clCxt ); std::swap( wholerows, b.wholerows ); std::swap( wholecols, b.wholecols ); - std::swap( download_channels, b.download_channels); } inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const @@ -366,7 +363,7 @@ namespace cv offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz; rows = row2 - row1; cols = col2 - col1; - if( esz *cols == step || rows == 1 ) + if( esz * cols == step || rows == 1 ) flags |= Mat::CONTINUOUS_FLAG; else flags &= ~Mat::CONTINUOUS_FLAG; @@ -388,7 +385,7 @@ namespace cv } inline size_t oclMat::elemSize() const { - return CV_ELEM_SIZE(flags); + return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels()))); } inline size_t oclMat::elemSize1() const { @@ -398,6 +395,10 @@ namespace cv { return CV_MAT_TYPE(flags); } + inline int oclMat::ocltype() const + { + return CV_MAKE_TYPE(depth(), oclchannels()); + } inline int oclMat::depth() const { return CV_MAT_DEPTH(flags); @@ -406,6 +407,10 @@ namespace cv { return CV_MAT_CN(flags); } + inline int oclMat::oclchannels() const + { + return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags)); + } inline size_t oclMat::step1() const { return step / elemSize1(); @@ -420,32 +425,32 @@ namespace cv } - + inline uchar *oclMat::ptr(int y) { CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n"); + CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n"); return data + step * y; } inline const uchar *oclMat::ptr(int y) const { CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n"); + CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n"); return data + step * y; } template inline _Tp *oclMat::ptr(int y) { CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n"); + CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n"); return (_Tp *)(data + step * y); } template inline const _Tp *oclMat::ptr(int y) const { CV_DbgAssert( (unsigned)y < (unsigned)rows ); - CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n"); + CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n"); return (const _Tp *)(data + step * y); } @@ -461,18 +466,20 @@ namespace cv a.swap(b); } - inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat& m) - { - if (m.type() == type && m.rows >= rows && m.cols >= cols) - m = m(Rect(0, 0, cols, rows)); - else - m.create(rows, cols, type); - } + inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat &m) + { + if (m.type() == type && m.rows >= rows && m.cols >= cols) + m = m(Rect(0, 0, cols, rows)); + else + m.create(rows, cols, type); + } + + inline void ensureSizeIsEnough(Size size, int type, oclMat &m) + { + ensureSizeIsEnough(size.height, size.width, type, m); + } + - inline void ensureSizeIsEnough(Size size, int type, oclMat& m) - { - ensureSizeIsEnough(size.height, size.width, type, m); - } } /* end of namespace ocl */ } /* end of namespace cv */ diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 961831a..9110fef 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -57,7 +57,7 @@ namespace cv namespace ocl { using std::auto_ptr; - + #define CVCL_DEVICE_TYPE_DEFAULT (1 << 0) #define CVCL_DEVICE_TYPE_CPU (1 << 1) #define CVCL_DEVICE_TYPE_GPU (1 << 2) @@ -76,22 +76,26 @@ namespace cv ~Info(); void release(); Info &operator = (const Info &m); + std::vector DeviceName; }; //////////////////////////////// Initialization & Info //////////////////////// //this function may be obsoleted //CV_EXPORTS cl_device_id getDevice(); //the function must be called before any other cv::ocl::functions, it initialize ocl runtime - CV_EXPORTS int getDevice(std::vector& oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU); + CV_EXPORTS int getDevice(std::vector &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU); //set device you want to use, optional function after getDevice be called CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); //this function is not ready yet //CV_EXPORTS void getComputeCapability(cl_device_id device, int &major, int &minor); //optional function, if you want save opencl binary kernel to the file, set its path CV_EXPORTS void setBinpath(const char *path); - //The two functions below are used to get opencl runtime so that opencv can interactive with - //other opencl program - CV_EXPORTS void* getoclContext(); - CV_EXPORTS void* getoclCommandQueue(); + //The two functions below are used to get opencl runtime so that opencv can interactive with + + //other opencl program + + CV_EXPORTS void *getoclContext(); + + CV_EXPORTS void *getoclCommandQueue(); //////////////////////////////// Error handling //////////////////////// CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); @@ -116,7 +120,7 @@ namespace cv //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat { - public: + public: //! default constructor oclMat(); //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) @@ -224,10 +228,16 @@ namespace cv size_t elemSize1() const; //! returns element type, similar to CV_MAT_TYPE(cvMat->type) int type() const; + //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl + //! 3 channels element actually use 4 channel space + int ocltype() const; //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) int depth() const; //! returns element type, similar to CV_MAT_CN(cvMat->type) int channels() const; + //! returns element type, return 4 for 3 channels element, + //!becuase 3 channels element actually use 4 channel space + int oclchannels() const; //! returns step/elemSize1() size_t step1() const; //! returns oclMatrix size: @@ -277,8 +287,6 @@ namespace cv //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used int wholerows; int wholecols; - //add download_channels for 3 channels to 4 channels - int download_channels; }; ///////////////////// mat split and merge ///////////////////////////////// @@ -298,7 +306,7 @@ namespace cv //#else //typedef float F; //#endif - // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c); + // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c); CV_EXPORTS void addWeighted(const oclMat &a, double alpha, const oclMat &b, double beta, double gama, oclMat &c); //! adds one matrix to another (c = a + b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 @@ -369,14 +377,18 @@ namespace cv // support all types CV_EXPORTS Scalar sum(const oclMat &m); + CV_EXPORTS Scalar sqrSum(const oclMat &m); + //! finds global minimum and maximum array elements and returns their values - // support all C1 types + // support all C1 types + CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); //! finds global minimum and maximum array elements and returns their values with locations - // support all C1 types + // support all C1 types + CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0, - const oclMat &mask = oclMat()); + const oclMat &mask = oclMat()); //! counts non-zero array elements // support all types @@ -440,9 +452,12 @@ namespace cv // supports all types CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); - //! computes convolution of two images - //! support only CV_32FC1 type - CV_EXPORTS void convolve(const oclMat& image,const oclMat& temp1, oclMat& result); + //! computes convolution of two images + + //! support only CV_32FC1 type + + CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result); + //! Logical operators CV_EXPORTS oclMat operator ~ (const oclMat &src); @@ -461,11 +476,11 @@ namespace cv */ class CV_EXPORTS BaseRowFilter_GPU { - public: - BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} - virtual ~BaseRowFilter_GPU() {} - virtual void operator()(const oclMat &src, oclMat &dst) = 0; - int ksize, anchor, bordertype; + public: + BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} + virtual ~BaseRowFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + int ksize, anchor, bordertype; }; /*! @@ -476,11 +491,11 @@ namespace cv */ class CV_EXPORTS BaseColumnFilter_GPU { - public: - BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} - virtual ~BaseColumnFilter_GPU() {} - virtual void operator()(const oclMat &src, oclMat &dst) = 0; - int ksize, anchor, bordertype; + public: + BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} + virtual ~BaseColumnFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + int ksize, anchor, bordertype; }; /*! @@ -490,14 +505,14 @@ namespace cv */ class CV_EXPORTS BaseFilter_GPU { - public: - BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_) - : ksize(ksize_), anchor(anchor_), borderType(borderType_) {} - virtual ~BaseFilter_GPU() {} - virtual void operator()(const oclMat &src, oclMat &dst) = 0; - Size ksize; - Point anchor; - int borderType; + public: + BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_) + : ksize(ksize_), anchor(anchor_), borderType(borderType_) {} + virtual ~BaseFilter_GPU() {} + virtual void operator()(const oclMat &src, oclMat &dst) = 0; + Size ksize; + Point anchor; + int borderType; }; /*! @@ -508,10 +523,10 @@ namespace cv */ class CV_EXPORTS FilterEngine_GPU { - public: - virtual ~FilterEngine_GPU() {} + public: + virtual ~FilterEngine_GPU() {} - virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0; + virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0; }; //! returns the non-separable filter engine with the specified filter @@ -530,14 +545,14 @@ namespace cv const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); //! returns the separable filter engine with the specified filters - CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter); + CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr &rowFilter, + const Ptr &columnFilter); //! returns the Gaussian filter engine CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); //! returns filter engine for the generalized Sobel operator - CV_EXPORTS Ptr createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType=BORDER_DEFAULT ); + CV_EXPORTS Ptr createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT ); //! applies Laplacian operator to the image // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type @@ -565,7 +580,7 @@ namespace cv // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize, - Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! returns 2D morphological filter //! only MORPH_ERODE and MORPH_DILATE are supported @@ -582,18 +597,18 @@ namespace cv // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1), - int borderType = BORDER_CONSTANT) + int borderType = BORDER_CONSTANT) { boxFilter(src, dst, -1, ksize, anchor, borderType); } //! applies non-separable 2D linear filter to the image CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, - Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); + Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! applies separable 2D linear filter to the image CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, - Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); + Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); //! applies generalized Sobel operator to the image // dst.type must equalize src.type @@ -615,30 +630,36 @@ namespace cv //! erodes the image (applies the local minimum operator) // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 - CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, - int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue()); + CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + //! dilates the image (applies the local maximum operator) // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 - CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, - int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue()); + CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + //! applies an advanced morphological operation to the image - CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, - int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue()); + CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, + + int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); + ////////////////////////////// Image processing ////////////////////////////// //! Does mean shift filtering on GPU. CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! Does mean shift procedure on GPU. CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! Does mean shift segmentation with elimiation of small regions. CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, - TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); + TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! applies fixed threshold to the image. // supports CV_8UC1 and CV_32FC1 data type @@ -650,11 +671,16 @@ namespace cv // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR); - //! Applies a generic geometrical transformation to an image. - // Supports INTER_NEAREST, INTER_LINEAR. - // Map1 supports CV_16SC2, CV_32FC2 types. - // Src supports CV_8UC1, CV_8UC2, CV_8UC4. - CV_EXPORTS void remap(const oclMat& src, oclMat& dst, oclMat& map1, oclMat& map2, int interpolation, int bordertype, const Scalar& value = Scalar()); + //! Applies a generic geometrical transformation to an image. + + // Supports INTER_NEAREST, INTER_LINEAR. + + // Map1 supports CV_16SC2, CV_32FC2 types. + + // Src supports CV_8UC1, CV_8UC2, CV_8UC4. + + CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar()); + //! copies 2D array to a larger destination array and pads borders with user-specifiable constant // supports CV_8UC1, CV_8UC4, CV_32SC1 types CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar()); @@ -692,503 +718,1026 @@ namespace cv ~OclCascadeClassifier() {}; CvSeq *oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor, - int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0)); + int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0)); + }; + + + + /////////////////////////////// Pyramid ///////////////////////////////////// + CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst); + + //! upsamples the source image and then smoothes it + CV_EXPORTS void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst); + + //! performs linear blending of two images + //! to avoid accuracy errors sum of weigths shouldn't be very close to zero + // supports only CV_8UC1 source type + CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result); + + //! computes vertical sum, supports only CV_32FC1 images + CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum); + + ///////////////////////////////////////// match_template ///////////////////////////////////////////////////////////// + struct CV_EXPORTS MatchTemplateBuf + { + Size user_block_size; + oclMat imagef, templf; + std::vector images; + std::vector image_sums; + std::vector image_sqsums; }; - //! computes vertical sum, supports only CV_32FC1 images - CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum); - - //! performs linear blending of two images - //! to avoid accuracy errors sum of weigths shouldn't be very close to zero - // supports only CV_8UC1 source type - CV_EXPORTS void blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result); - - /////////////////////////////// Pyramid ///////////////////////////////////// - CV_EXPORTS void pyrDown(const oclMat& src, oclMat& dst); - - //! upsamples the source image and then smoothes it - CV_EXPORTS void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst); - - ///////////////////////////////////////// match_template ///////////////////////////////////////////////////////////// - struct CV_EXPORTS MatchTemplateBuf - { - Size user_block_size; - oclMat imagef, templf; - std::vector images; - std::vector image_sums; - std::vector image_sqsums; - }; - - - //! computes the proximity map for the raster template and the image where the template is searched for - // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 - // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 - CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method); - - //! computes the proximity map for the raster template and the image where the template is searched for - // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 - // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 - CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf); - - - ///////////////////////////////////////////// Canny ///////////////////////////////////////////// - struct CV_EXPORTS CannyBuf; - - //! compute edges of the input image using Canny operator - // Support CV_8UC1 only - CV_EXPORTS void Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); - CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false); - - struct CV_EXPORTS CannyBuf - { - CannyBuf() : counter(NULL) {} - ~CannyBuf() { release(); } - explicit CannyBuf(const Size& image_size, int apperture_size = 3) : counter(NULL) - { - create(image_size, apperture_size); - } - CannyBuf(const oclMat& dx_, const oclMat& dy_); - - void create(const Size& image_size, int apperture_size = 3); - - void release(); - - oclMat dx, dy; - oclMat dx_buf, dy_buf; - oclMat edgeBuf; - oclMat trackBuf1, trackBuf2; - void * counter; - Ptr filterDX, filterDY; - }; + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method); + + //! computes the proximity map for the raster template and the image where the template is searched for + // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 + // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 + CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf); + + + + ///////////////////////////////////////////// Canny ///////////////////////////////////////////// + + struct CV_EXPORTS CannyBuf; + + + + //! compute edges of the input image using Canny operator + + // Support CV_8UC1 only + + CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + + CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); + + CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); + + CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); + + + + struct CV_EXPORTS CannyBuf + + { + + CannyBuf() : counter(NULL) {} + + ~CannyBuf() + { + release(); + } + + explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL) + + { + + create(image_size, apperture_size); + + } + + CannyBuf(const oclMat &dx_, const oclMat &dy_); + + + + void create(const Size &image_size, int apperture_size = 3); + + + + void release(); + + + + oclMat dx, dy; + + oclMat dx_buf, dy_buf; + + oclMat edgeBuf; + + oclMat trackBuf1, trackBuf2; + + void *counter; + + Ptr filterDX, filterDY; + + }; #ifdef HAVE_CLAMDFFT - ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// - // the two functions must be called before/after run any fft library functions. - CV_EXPORTS void fft_setup(); // this will be implicitly invoked - CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually - - /////////////////////////////////////// DFT ///////////////////////////////////////////////////// - //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. - //! Param dft_size is the size of DFT transform. - //! - //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format. - // support src type of CV32FC1, CV32FC2 - // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS - // dft_size is the size of original input, which is used for transformation from complex to real. - // dft_size must be powers of 2, 3 and 5 - // real to complex dft requires at least v1.8 clAmdFft - // real to complex dft output is not the same with cpu version - // real to complex and complex to real does not support DFT_ROWS - CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0); + ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// + //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. + //! Param dft_size is the size of DFT transform. + //! + //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format. + // support src type of CV32FC1, CV32FC2 + // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS + // dft_size is the size of original input, which is used for transformation from complex to real. + // dft_size must be powers of 2, 3 and 5 + // real to complex dft requires at least v1.8 clAmdFft + // real to complex dft output is not the same with cpu version + // real to complex and complex to real does not support DFT_ROWS + CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0); #endif // HAVE_CLAMDFFT #ifdef HAVE_CLAMDBLAS - //! implements generalized matrix product algorithm GEMM from BLAS - // The functionality requires clAmdBlas library - // only support type CV_32FC1 - // flag GEMM_3_T is not supported - CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha, - const oclMat& src3, double beta, oclMat& dst, int flags = 0); + //! implements generalized matrix product algorithm GEMM from BLAS + // The functionality requires clAmdBlas library + // only support type CV_32FC1 + // flag GEMM_3_T is not supported + CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha, + const oclMat &src3, double beta, oclMat &dst, int flags = 0); #endif - //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// - struct CV_EXPORTS HOGDescriptor - { - enum { DEFAULT_WIN_SIGMA = -1 }; - enum { DEFAULT_NLEVELS = 64 }; - enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; - - HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), - Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), - int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, - double threshold_L2hys=0.2, bool gamma_correction=true, - int nlevels=DEFAULT_NLEVELS); - - size_t getDescriptorSize() const; - size_t getBlockHistogramSize() const; - - void setSVMDetector(const vector& detector); - - static vector getDefaultPeopleDetector(); - static vector getPeopleDetector48x96(); - static vector getPeopleDetector64x128(); - - void detect(const oclMat& img, vector& found_locations, - double hit_threshold=0, Size win_stride=Size(), - Size padding=Size()); - - void detectMultiScale(const oclMat& img, vector& found_locations, - double hit_threshold=0, Size win_stride=Size(), - Size padding=Size(), double scale0=1.05, - int group_threshold=2); - - void getDescriptors(const oclMat& img, Size win_stride, - oclMat& descriptors, - int descr_format=DESCR_FORMAT_COL_BY_COL); - - Size win_size; - Size block_size; - Size block_stride; - Size cell_size; - int nbins; - double win_sigma; - double threshold_L2hys; - bool gamma_correction; - int nlevels; - - protected: - // initialize buffers; only need to do once in case of multiscale detection - void init_buffer(const oclMat& img, Size win_stride); - - void computeBlockHistograms(const oclMat& img); - void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle); - - double getWinSigma() const; - bool checkDetectorSize() const; - - static int numPartsWithin(int size, int part_size, int stride); - static Size numPartsWithin(Size size, Size part_size, Size stride); - - // Coefficients of the separating plane - float free_coef; - oclMat detector; - - // Results of the last classification step - oclMat labels; - Mat labels_host; - - // Results of the last histogram evaluation step - oclMat block_hists; - - // Gradients conputation results - oclMat grad, qangle; - - // scaled image - oclMat image_scale; - - // effect size of input image (might be different from original size after scaling) - Size effect_size; - }; - - //! Speeded up robust features, port from GPU module. - ////////////////////////////////// SURF ////////////////////////////////////////// - class CV_EXPORTS SURF_OCL - { - public: - enum KeypointLayout - { - X_ROW = 0, - Y_ROW, - LAPLACIAN_ROW, - OCTAVE_ROW, - SIZE_ROW, - ANGLE_ROW, - HESSIAN_ROW, - ROWS_COUNT - }; - - //! the default constructor - SURF_OCL(); - //! the full constructor taking all the necessary parameters - explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4, - int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false); - - //! returns the descriptor size in float's (64 or 128) - int descriptorSize() const; - - //! upload host keypoints to device memory - void uploadKeypoints(const vector& keypoints, oclMat& keypointsocl); - //! download keypoints from device to host memory - void downloadKeypoints(const oclMat& keypointsocl, vector& keypoints); - - //! download descriptors from device to host memory - void downloadDescriptors(const oclMat& descriptorsocl, vector& descriptors); - - //! finds the keypoints using fast hessian detector used in SURF - //! supports CV_8UC1 images - //! keypoints will have nFeature cols and 6 rows - //! keypoints.ptr(X_ROW)[i] will contain x coordinate of i'th feature - //! keypoints.ptr(Y_ROW)[i] will contain y coordinate of i'th feature - //! keypoints.ptr(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature - //! keypoints.ptr(OCTAVE_ROW)[i] will contain octave of i'th feature - //! keypoints.ptr(SIZE_ROW)[i] will contain size of i'th feature - //! keypoints.ptr(ANGLE_ROW)[i] will contain orientation of i'th feature - //! keypoints.ptr(HESSIAN_ROW)[i] will contain response of i'th feature - void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints); - //! finds the keypoints and computes their descriptors. - //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction - void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors, - bool useProvidedKeypoints = false); - - void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints); - void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints, oclMat& descriptors, - bool useProvidedKeypoints = false); - - void operator()(const oclMat& img, const oclMat& mask, std::vector& keypoints, std::vector& descriptors, - bool useProvidedKeypoints = false); - - void releaseMemory(); - - // SURF parameters - float hessianThreshold; - int nOctaves; - int nOctaveLayers; - bool extended; - bool upright; - - //! max keypoints = min(keypointsRatio * img.size().area(), 65535) - float keypointsRatio; - - oclMat sum, mask1, maskSum, intBuffer; - - oclMat det, trace; - - oclMat maxPosBuffer; - - }; - ////////////////////////////////// BruteForceMatcher ////////////////////////////////// - - class CV_EXPORTS BruteForceMatcher_OCL_base - { - public: - enum DistType {L1Dist = 0, L2Dist, HammingDist}; - - explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist); - - // Add descriptors to train descriptor collection - void add(const std::vector& descCollection); - - // Get train descriptors collection - const std::vector& getTrainDescriptors() const; - - // Clear train descriptors collection - void clear(); - - // Return true if there are not train descriptors in collection - bool empty() const; - - // Return true if the matcher supports mask in match methods - bool isMaskSupported() const; - - // Find one best match for each query descriptor - void matchSingle(const oclMat& query, const oclMat& train, - oclMat& trainIdx, oclMat& distance, - const oclMat& mask = oclMat()); - - // Download trainIdx and distance and convert it to CPU vector with DMatch - static void matchDownload(const oclMat& trainIdx, const oclMat& distance, std::vector& matches); - // Convert trainIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches); - - // Find one best match for each query descriptor - void match(const oclMat& query, const oclMat& train, std::vector& matches, const oclMat& mask = oclMat()); - - // Make gpu collection of trains and masks in suitable format for matchCollection function - void makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const std::vector& masks = std::vector()); - - // Find one best match from train collection for each query descriptor - void matchCollection(const oclMat& query, const oclMat& trainCollection, - oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, - const oclMat& masks = oclMat()); - - // Download trainIdx, imgIdx and distance and convert it to vector with DMatch - static void matchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector& matches); - // Convert trainIdx, imgIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches); - - // Find one best match from train collection for each query descriptor. - void match(const oclMat& query, std::vector& matches, const std::vector& masks = std::vector()); - - // Find k best matches for each query descriptor (in increasing order of distances) - void knnMatchSingle(const oclMat& query, const oclMat& train, - oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k, - const oclMat& mask = oclMat()); - - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void knnMatchDownload(const oclMat& trainIdx, const oclMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx and distance to vector with DMatch - static void knnMatchConvert(const Mat& trainIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const oclMat& query, const oclMat& train, - std::vector< std::vector >& matches, int k, const oclMat& mask = oclMat(), - bool compactResult = false); - - // Find k best matches from train collection for each query descriptor (in increasing order of distances) - void knnMatch2Collection(const oclMat& query, const oclMat& trainCollection, - oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, - const oclMat& maskCollection = oclMat()); - - // Download trainIdx and distance and convert it to vector with DMatch - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx and distance to vector with DMatch - static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - - // Find k best matches for each query descriptor (in increasing order of distances). - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const oclMat& query, std::vector< std::vector >& matches, int k, - const std::vector& masks = std::vector(), bool compactResult = false); - - // Find best matches for each query descriptor which have distance less than maxDistance. - // nMatches.at(0, queryIdx) will contain matches count for queryIdx. - // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, - // because it didn't have enough memory. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. - void radiusMatchSingle(const oclMat& query, const oclMat& train, - oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance, - const oclMat& mask = oclMat()); - - // Download trainIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - - // Find best matches for each query descriptor which have distance less than maxDistance - // in increasing order of distances). - void radiusMatch(const oclMat& query, const oclMat& train, - std::vector< std::vector >& matches, float maxDistance, - const oclMat& mask = oclMat(), bool compactResult = false); - - // Find best matches for each query descriptor which have distance less than maxDistance. - // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), - // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - // Matches doesn't sorted. - void radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance, - const std::vector& masks = std::vector()); - - // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. - // matches will be sorted in increasing order of distances. - // compactResult is used when mask is not empty. If compactResult is false matches - // vector will have the same size as queryDescriptors rows. If compactResult is true - // matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - // Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - - // Find best matches from train collection for each query descriptor which have distance less than - // maxDistance (in increasing order of distances). - void radiusMatch(const oclMat& query, std::vector< std::vector >& matches, float maxDistance, - const std::vector& masks = std::vector(), bool compactResult = false); - - DistType distType; - - private: - std::vector trainDescCollection; - }; - - template - class CV_EXPORTS BruteForceMatcher_OCL; - - template - class CV_EXPORTS BruteForceMatcher_OCL< L1 > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {} - explicit BruteForceMatcher_OCL(L1 /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {} - }; - template - class CV_EXPORTS BruteForceMatcher_OCL< L2 > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {} - explicit BruteForceMatcher_OCL(L2 /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {} - }; - template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base - { - public: - explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {} - explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {} - }; - - /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// - class CV_EXPORTS PyrLKOpticalFlow - { - public: - PyrLKOpticalFlow() - { - winSize = Size(21, 21); - maxLevel = 3; - iters = 30; - derivLambda = 0.5; - useInitialFlow = false; - minEigThreshold = 1e-4f; - getMinEigenVals = false; - isDeviceArch11_ = false; - } - - void sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, - oclMat& status, oclMat* err = 0); - - void dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0); - - Size winSize; - int maxLevel; - int iters; - double derivLambda; - bool useInitialFlow; - float minEigThreshold; - bool getMinEigenVals; - - void releaseMemory() - { - dx_calcBuf_.release(); - dy_calcBuf_.release(); - - prevPyr_.clear(); - nextPyr_.clear(); - - dx_buf_.release(); - dy_buf_.release(); - } - - private: - void calcSharrDeriv(const oclMat& src, oclMat& dx, oclMat& dy); - - void buildImagePyramid(const oclMat& img0, vector& pyr, bool withBorder); - - oclMat dx_calcBuf_; - oclMat dy_calcBuf_; - - vector prevPyr_; - vector nextPyr_; - - oclMat dx_buf_; - oclMat dy_buf_; - - oclMat uPyr_[2]; - oclMat vPyr_[2]; - - bool isDeviceArch11_; - }; - + //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// + + struct CV_EXPORTS HOGDescriptor + + { + + enum { DEFAULT_WIN_SIGMA = -1 }; + + enum { DEFAULT_NLEVELS = 64 }; + + enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; + + + + HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16), + + Size block_stride = Size(8, 8), Size cell_size = Size(8, 8), + + int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA, + + double threshold_L2hys = 0.2, bool gamma_correction = true, + + int nlevels = DEFAULT_NLEVELS); + + + + size_t getDescriptorSize() const; + + size_t getBlockHistogramSize() const; + + + + void setSVMDetector(const vector &detector); + + + + static vector getDefaultPeopleDetector(); + + static vector getPeopleDetector48x96(); + + static vector getPeopleDetector64x128(); + + + + void detect(const oclMat &img, vector &found_locations, + + double hit_threshold = 0, Size win_stride = Size(), + + Size padding = Size()); + + + + void detectMultiScale(const oclMat &img, vector &found_locations, + + double hit_threshold = 0, Size win_stride = Size(), + + Size padding = Size(), double scale0 = 1.05, + + int group_threshold = 2); + + + + void getDescriptors(const oclMat &img, Size win_stride, + + oclMat &descriptors, + + int descr_format = DESCR_FORMAT_COL_BY_COL); + + + + Size win_size; + + Size block_size; + + Size block_stride; + + Size cell_size; + + int nbins; + + double win_sigma; + + double threshold_L2hys; + + bool gamma_correction; + + int nlevels; + + + + protected: + + // initialize buffers; only need to do once in case of multiscale detection + + void init_buffer(const oclMat &img, Size win_stride); + + + + void computeBlockHistograms(const oclMat &img); + + void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle); + + + + double getWinSigma() const; + + bool checkDetectorSize() const; + + + + static int numPartsWithin(int size, int part_size, int stride); + + static Size numPartsWithin(Size size, Size part_size, Size stride); + + + + // Coefficients of the separating plane + + float free_coef; + + oclMat detector; + + + + // Results of the last classification step + + oclMat labels; + + Mat labels_host; + + + + // Results of the last histogram evaluation step + + oclMat block_hists; + + + + // Gradients conputation results + + oclMat grad, qangle; + + + + // scaled image + + oclMat image_scale; + + + + // effect size of input image (might be different from original size after scaling) + + Size effect_size; + + }; + + + + //! Speeded up robust features, port from GPU module. + ////////////////////////////////// SURF ////////////////////////////////////////// + + class CV_EXPORTS SURF_OCL + + { + + public: + + enum KeypointLayout + + { + + X_ROW = 0, + + Y_ROW, + + LAPLACIAN_ROW, + + OCTAVE_ROW, + + SIZE_ROW, + + ANGLE_ROW, + + HESSIAN_ROW, + + ROWS_COUNT + + }; + + + + //! the default constructor + + SURF_OCL(); + + //! the full constructor taking all the necessary parameters + + explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4, + + int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false); + + + + //! returns the descriptor size in float's (64 or 128) + + int descriptorSize() const; + + + + //! upload host keypoints to device memory + + void uploadKeypoints(const vector &keypoints, oclMat &keypointsocl); + + //! download keypoints from device to host memory + + void downloadKeypoints(const oclMat &keypointsocl, vector &keypoints); + + + + //! download descriptors from device to host memory + + void downloadDescriptors(const oclMat &descriptorsocl, vector &descriptors); + + + + //! finds the keypoints using fast hessian detector used in SURF + + //! supports CV_8UC1 images + + //! keypoints will have nFeature cols and 6 rows + + //! keypoints.ptr(X_ROW)[i] will contain x coordinate of i'th feature + + //! keypoints.ptr(Y_ROW)[i] will contain y coordinate of i'th feature + + //! keypoints.ptr(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature + + //! keypoints.ptr(OCTAVE_ROW)[i] will contain octave of i'th feature + + //! keypoints.ptr(SIZE_ROW)[i] will contain size of i'th feature + + //! keypoints.ptr(ANGLE_ROW)[i] will contain orientation of i'th feature + + //! keypoints.ptr(HESSIAN_ROW)[i] will contain response of i'th feature + + void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints); + + //! finds the keypoints and computes their descriptors. + + //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction + + void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors, + + bool useProvidedKeypoints = false); + + + + void operator()(const oclMat &img, const oclMat &mask, std::vector &keypoints); + + void operator()(const oclMat &img, const oclMat &mask, std::vector &keypoints, oclMat &descriptors, + + bool useProvidedKeypoints = false); + + + + void operator()(const oclMat &img, const oclMat &mask, std::vector &keypoints, std::vector &descriptors, + + bool useProvidedKeypoints = false); + + + + void releaseMemory(); + + + + // SURF parameters + + float hessianThreshold; + + int nOctaves; + + int nOctaveLayers; + + bool extended; + + bool upright; + + + + //! max keypoints = min(keypointsRatio * img.size().area(), 65535) + + float keypointsRatio; + + + + oclMat sum, mask1, maskSum, intBuffer; + + + + oclMat det, trace; + + + + oclMat maxPosBuffer; + + }; + + ////////////////////////feature2d_ocl///////////////// + /****************************************************************************************\ + * Distance * + \****************************************************************************************/ + + template + struct CV_EXPORTS Accumulator + { + typedef T Type; + }; + + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + template<> struct Accumulator + { + typedef float Type; + }; + + /* + * Manhattan distance (city block distance) functor + */ + template + struct CV_EXPORTS L1 + { + enum { normType = NORM_L1 }; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T *a, const T *b, int size ) const + { + return normL1(a, b, size); + } + }; + + /* + * Euclidean distance functor + */ + template + struct CV_EXPORTS L2 + { + enum { normType = NORM_L2 }; + typedef T ValueType; + typedef typename Accumulator::Type ResultType; + + ResultType operator()( const T *a, const T *b, int size ) const + { + return (ResultType)sqrt((double)normL2Sqr(a, b, size)); + } + }; + + /* + * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor + * bit count of A exclusive XOR'ed with B + */ + struct CV_EXPORTS Hamming + { + enum { normType = NORM_HAMMING }; + typedef unsigned char ValueType; + typedef int ResultType; + + /** this will count the bits in a ^ b + */ + ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const + { + return normHamming(a, b, size); + } + }; + + ////////////////////////////////// BruteForceMatcher ////////////////////////////////// + + class CV_EXPORTS BruteForceMatcher_OCL_base + { + public: + enum DistType {L1Dist = 0, L2Dist, HammingDist}; + + explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist); + + + + // Add descriptors to train descriptor collection + + void add(const std::vector &descCollection); + + + + // Get train descriptors collection + + const std::vector &getTrainDescriptors() const; + + + + // Clear train descriptors collection + + void clear(); + + + + // Return true if there are not train descriptors in collection + + bool empty() const; + + + + // Return true if the matcher supports mask in match methods + + bool isMaskSupported() const; + + + + // Find one best match for each query descriptor + + void matchSingle(const oclMat &query, const oclMat &train, + + oclMat &trainIdx, oclMat &distance, + + const oclMat &mask = oclMat()); + + + + // Download trainIdx and distance and convert it to CPU vector with DMatch + + static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector &matches); + + // Convert trainIdx and distance to vector with DMatch + + static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector &matches); + + + + // Find one best match for each query descriptor + + void match(const oclMat &query, const oclMat &train, std::vector &matches, const oclMat &mask = oclMat()); + + + + // Make gpu collection of trains and masks in suitable format for matchCollection function + + void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector &masks = std::vector()); + + + + // Find one best match from train collection for each query descriptor + + void matchCollection(const oclMat &query, const oclMat &trainCollection, + + oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, + + const oclMat &masks = oclMat()); + + + + // Download trainIdx, imgIdx and distance and convert it to vector with DMatch + + static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector &matches); + + // Convert trainIdx, imgIdx and distance to vector with DMatch + + static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector &matches); + + + + // Find one best match from train collection for each query descriptor. + + void match(const oclMat &query, std::vector &matches, const std::vector &masks = std::vector()); + + + + // Find k best matches for each query descriptor (in increasing order of distances) + + void knnMatchSingle(const oclMat &query, const oclMat &train, + + oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, + + const oclMat &mask = oclMat()); + + + + // Download trainIdx and distance and convert it to vector with DMatch + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, + + std::vector< std::vector > &matches, bool compactResult = false); + + // Convert trainIdx and distance to vector with DMatch + + static void knnMatchConvert(const Mat &trainIdx, const Mat &distance, + + std::vector< std::vector > &matches, bool compactResult = false); + + + + // Find k best matches for each query descriptor (in increasing order of distances). + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + void knnMatch(const oclMat &query, const oclMat &train, + + std::vector< std::vector > &matches, int k, const oclMat &mask = oclMat(), + + bool compactResult = false); + + + + // Find k best matches from train collection for each query descriptor (in increasing order of distances) + + void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, + + oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, + + const oclMat &maskCollection = oclMat()); + + + + // Download trainIdx and distance and convert it to vector with DMatch + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, + + std::vector< std::vector > &matches, bool compactResult = false); + + // Convert trainIdx and distance to vector with DMatch + + static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, + + std::vector< std::vector > &matches, bool compactResult = false); + + + + // Find k best matches for each query descriptor (in increasing order of distances). + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + void knnMatch(const oclMat &query, std::vector< std::vector > &matches, int k, + + const std::vector &masks = std::vector(), bool compactResult = false); + + + + // Find best matches for each query descriptor which have distance less than maxDistance. + + // nMatches.at(0, queryIdx) will contain matches count for queryIdx. + + // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, + + // because it didn't have enough memory. + + // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), + + // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + + // Matches doesn't sorted. + + void radiusMatchSingle(const oclMat &query, const oclMat &train, + + oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, + + const oclMat &mask = oclMat()); + + + + // Download trainIdx, nMatches and distance and convert it to vector with DMatch. + + // matches will be sorted in increasing order of distances. + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, + + std::vector< std::vector > &matches, bool compactResult = false); + + // Convert trainIdx, nMatches and distance to vector with DMatch. + + static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches, + + std::vector< std::vector > &matches, bool compactResult = false); + + + + // Find best matches for each query descriptor which have distance less than maxDistance + + // in increasing order of distances). + + void radiusMatch(const oclMat &query, const oclMat &train, + + std::vector< std::vector > &matches, float maxDistance, + + const oclMat &mask = oclMat(), bool compactResult = false); + + + + // Find best matches for each query descriptor which have distance less than maxDistance. + + // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), + + // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches + + // Matches doesn't sorted. + + void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance, + + const std::vector &masks = std::vector()); + + + + // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. + + // matches will be sorted in increasing order of distances. + + // compactResult is used when mask is not empty. If compactResult is false matches + + // vector will have the same size as queryDescriptors rows. If compactResult is true + + // matches vector will not contain matches for fully masked out query descriptors. + + static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches, + + std::vector< std::vector > &matches, bool compactResult = false); + + // Convert trainIdx, nMatches and distance to vector with DMatch. + + static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches, + + std::vector< std::vector > &matches, bool compactResult = false); + + + + // Find best matches from train collection for each query descriptor which have distance less than + + // maxDistance (in increasing order of distances). + + void radiusMatch(const oclMat &query, std::vector< std::vector > &matches, float maxDistance, + + const std::vector &masks = std::vector(), bool compactResult = false); + + + + DistType distType; + + + + private: + + std::vector trainDescCollection; + + }; + + + + template + + class CV_EXPORTS BruteForceMatcher_OCL; + + + + template + + class CV_EXPORTS BruteForceMatcher_OCL< L1 > : public BruteForceMatcher_OCL_base + + { + + public: + + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {} + + explicit BruteForceMatcher_OCL(L1 /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {} + + }; + + template + + class CV_EXPORTS BruteForceMatcher_OCL< L2 > : public BruteForceMatcher_OCL_base + + { + + public: + + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {} + + explicit BruteForceMatcher_OCL(L2 /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {} + + }; + + template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base + + { + + public: + + explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {} + + explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {} + + }; + + + + /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// + + class CV_EXPORTS PyrLKOpticalFlow + + { + + public: + + PyrLKOpticalFlow() + + { + + winSize = Size(21, 21); + + maxLevel = 3; + + iters = 30; + + derivLambda = 0.5; + + useInitialFlow = false; + + minEigThreshold = 1e-4f; + + getMinEigenVals = false; + + isDeviceArch11_ = false; + + } + + + + void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, + + oclMat &status, oclMat *err = 0); + + + + void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0); + + + + Size winSize; + + int maxLevel; + + int iters; + + double derivLambda; + + bool useInitialFlow; + + float minEigThreshold; + + bool getMinEigenVals; + + + + void releaseMemory() + + { + + dx_calcBuf_.release(); + + dy_calcBuf_.release(); + + + + prevPyr_.clear(); + + nextPyr_.clear(); + + + + dx_buf_.release(); + + dy_buf_.release(); + + } + + + + private: + + void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy); + + + + void buildImagePyramid(const oclMat &img0, vector &pyr, bool withBorder); + + + + oclMat dx_calcBuf_; + + oclMat dy_calcBuf_; + + + + vector prevPyr_; + + vector nextPyr_; + + + + oclMat dx_buf_; + + oclMat dy_buf_; + + + + oclMat uPyr_[2]; + + oclMat vPyr_[2]; + + + + bool isDeviceArch11_; + + }; + //////////////// build warping maps //////////////////// + //! builds plane warping maps + CV_EXPORTS void buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &); + //! builds cylindrical warping maps + CV_EXPORTS void buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &); + //! builds spherical warping maps + CV_EXPORTS void buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &); + //! builds Affine warping maps + CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); + + //! builds Perspective warping maps + CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); + + ///////////////////////////////////// interpolate frames ////////////////////////////////////////////// + //! Interpolate frames (images) using provided optical flow (displacement field). + //! frame0 - frame 0 (32-bit floating point images, single channel) + //! frame1 - frame 1 (the same type and size) + //! fu - forward horizontal displacement + //! fv - forward vertical displacement + //! bu - backward horizontal displacement + //! bv - backward vertical displacement + //! pos - new frame position + //! newFrame - new frame + //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat; + //! occlusion masks 0, occlusion masks 1, + //! interpolated forward flow 0, interpolated forward flow 1, + //! interpolated backward flow 0, interpolated backward flow 1 + //! + CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1, + const oclMat &fu, const oclMat &fv, + const oclMat &bu, const oclMat &bv, + float pos, oclMat &newFrame, oclMat &buf); + } } #include "opencv2/ocl/matrix_operations.hpp" diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp index d918004..fb89e70 100644 --- a/modules/ocl/perf/interpolation.hpp +++ b/modules/ocl/perf/interpolation.hpp @@ -42,7 +42,7 @@ #ifndef __OPENCV_TEST_INTERPOLATION_HPP__ #define __OPENCV_TEST_INTERPOLATION_HPP__ -template T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) +template T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { if (border_type == cv::BORDER_CONSTANT) return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at(y, x * src.channels() + c) : cv::saturate_cast(borderVal.val[c]); @@ -52,7 +52,7 @@ template T readVal(const cv::Mat& src, int y, int x, int c, int bor template struct NearestInterpolator { - static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { return readVal(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); } @@ -60,7 +60,7 @@ template struct NearestInterpolator template struct LinearInterpolator { - static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { x -= 0.5f; y -= 0.5f; @@ -85,7 +85,7 @@ template struct CubicInterpolator { static float getValue(float p[4], float x) { - return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0]))); + return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); } static float getValue(float p[4][4], float x, float y) @@ -100,7 +100,7 @@ template struct CubicInterpolator return getValue(arr, y); } - static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) + static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) { int ix = cvRound(x); int iy = cvRound(y); diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e5b9597..6636b11 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -50,46 +50,46 @@ using namespace cvtest; using namespace testing; void print_info() -{ +{ printf("\n"); #if defined _WIN32 # if defined _WIN64 - puts("OS: Windows 64"); + puts("OS: Windows 64"); # else - puts("OS: Windows 32"); + puts("OS: Windows 32"); # endif #elif defined linux # if defined _LP64 - puts("OS: Linux 64"); + puts("OS: Linux 64"); # else - puts("OS: Linux 32"); + puts("OS: Linux 32"); # endif #elif defined __APPLE__ # if defined _LP64 - puts("OS: Apple 64"); + puts("OS: Apple 64"); # else - puts("OS: Apple 32"); + puts("OS: Apple 32"); # endif #endif } -int main(int argc, char** argv) +int main(int argc, char **argv) { - std::vector oclinfo; + std::vector oclinfo; TS::ptr()->init("ocl"); InitGoogleTest(&argc, argv); print_info(); - int devnums = getDevice(oclinfo); - if(devnums<1) - { - std::cout << "no device found\n"; - return -1; - } - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); - setBinpath(CLBINPATH); + int devnums = getDevice(oclinfo); + if(devnums < 1) + { + std::cout << "no device found\n"; + return -1; + } + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + setBinpath(CLBINPATH); return RUN_ALL_TESTS(); } diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index 60458df..7ac8940 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -60,119 +60,120 @@ using namespace testing; using namespace std; PARAM_TEST_CASE(ArithmTestBase, MatType, bool) { - int type; - cv::Scalar val; - - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); - cv::RNG& rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + cv::Size size(MWIDTH, MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + mat1 = randomMat(rng, size, type, 5, 16, false); + //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums>0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty =1; - maskx =1; - masky =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx =0; - masky =0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty = 1; + maskx = 1; + masky = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + maskx = 0; + masky = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + //mat2_roi = mat2(Rect(src2x,src2y,256,1)); + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gdst1_whole = dst1; + //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat1 = mat1_roi; + //gmat2 = mat2_roi; + //gmask = mask_roi; + } }; ////////////////////////////////lut///////////////////////////////////////////////// @@ -180,81 +181,96 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool) struct Lut : ArithmTestBase {}; TEST_P(Lut, Mat) -{ +{ - cv::Mat mat2(3, 512, CV_8UC1); - cv::RNG& rng = TS::ptr()->get_rng(); - rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); + cv::Mat mat2(3, 512, CV_8UC1); + cv::RNG &rng = TS::ptr()->get_rng(); + rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false); + mat2_roi = mat2(Rect(src2x, src2y, 256, 1)); -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;kget_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::multiply(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2=(double)cvGetTickCount();//kernel - cv::ocl::multiply(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(Mul, Mat_Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + cv::RNG &rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + t0 = (double)cvGetTickCount();//cpu start + cv::multiply(mat1_roi, mat2_roi, dst_roi, s); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::multiply(gmat1, gmat2, gdst, s); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG& rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::multiply(gmat1, gmat2, gdst, s); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + cv::RNG &rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::multiply(gmat1, gmat2, gdst, s); + }; #endif } struct Div : ArithmTestBase {}; -TEST_P(Div, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;kget_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::divide(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2=(double)cvGetTickCount();//kernel - cv::ocl::divide(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(Div, Mat_Scalar) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + cv::RNG &rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + t0 = (double)cvGetTickCount();//cpu start + cv::divide(mat1_roi, mat2_roi, dst_roi, s); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::divide(gmat1, gmat2, gdst, s); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG& rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::divide(gmat1, gmat2, gdst, s); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + cv::RNG &rng = TS::ptr()->get_rng(); + double s = rng.uniform(-10.0, 10.0); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::divide(gmat1, gmat2, gdst, s); + }; #endif } struct Absdiff : ArithmTestBase {}; -TEST_P(Absdiff, Mat) -{ +TEST_P(Absdiff, Mat) +{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - if (val < minVal) minVal = val; - if (val > maxVal) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - t2=(double)cvGetTickCount();//kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(MinMax, MAT) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + t0 = (double)cvGetTickCount();//cpu start + if (mat1.depth() != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + if (val < minVal) minVal = val; + if (val > maxVal) maxVal = val; + } + } + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + double minVal_, maxVal_; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::minMax(gmat1, &minVal_, &maxVal_); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + double minVal_, maxVal_; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::minMax(gmat1, &minVal_, &maxVal_); + }; #endif } -TEST_P(MinMax, MASK) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - unsigned char m = mask_roi.at(i, j); - if (val < minVal && m) minVal = val; - if (val > maxVal && m) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - t2=(double)cvGetTickCount();//kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(MinMax, MASK) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + t0 = (double)cvGetTickCount();//cpu start + if (mat1.depth() != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + unsigned char m = mask_roi.at(i, j); + if (val < minVal && m) minVal = val; + if (val > maxVal && m) maxVal = val; + } + } + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); + }; #endif } struct MinMaxLoc : ArithmTestBase {}; -TEST_P(MinMaxLoc, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - if (val < minVal) { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal) { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2=(double)cvGetTickCount();//kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(MinMaxLoc, MAT) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + int depth = mat1.depth(); + t0 = (double)cvGetTickCount();//cpu start + if (depth != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + if (val < minVal) + { + minVal = val; + minLoc.x = j; + minLoc.y = i; + } + if (val > maxVal) + { + maxVal = val; + maxLoc.x = j; + maxLoc.y = i; + } + } + } + + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat()); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); + }; #endif } -TEST_P(MinMaxLoc, MASK) -{ +TEST_P(MinMaxLoc, MASK) +{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - unsigned char m = mask_roi.at(i ,j); - if (val < minVal && m) { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal && m) { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2=(double)cvGetTickCount();//kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + double minVal, maxVal; + cv::Point minLoc, maxLoc; + int depth = mat1.depth(); + t0 = (double)cvGetTickCount();//cpu start + if (depth != CV_8S) + { + cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); + } + else + { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + for (int i = 0; i < mat1_roi.rows; ++i) + for (int j = 0; j < mat1_roi.cols; ++j) + { + signed char val = mat1_roi.at(i, j); + unsigned char m = mask_roi.at(i , j); + if (val < minVal && m) + { + minVal = val; + minLoc.x = j; + minLoc.y = i; + } + if (val > maxVal && m) + { + maxVal = val; + maxLoc.x = j; + maxLoc.y = i; + } + } + } + + + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); + t2 = (double)cvGetTickCount() - t2;//kernel + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmask = mask_roi; + double minVal_, maxVal_; + cv::Point minLoc_, maxLoc_; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); + }; #endif } struct Sum : ArithmTestBase {}; -TEST_P(Sum, MAT) -{ +TEST_P(Sum, MAT) +{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - //type = GET_PARAM(0); - type = CV_8UC1; + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + //type = GET_PARAM(0); + type = CV_8UC1; - cv::RNG& rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + cv::Size size(MWIDTH, MHEIGHT); - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + mat1 = randomMat(rng, size, type, 5, 16, false); + //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums>0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty =1; - maskx =1; - masky =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx =0; - masky =0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty = 1; + maskx = 1; + masky = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + maskx = 0; + masky = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + //mat2_roi = mat2(Rect(src2x,src2y,256,1)); + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gdst1_whole = dst1; + //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat1 = mat1_roi; + //gmat2 = mat2_roi; + //gmask = mask_roi; + } }; struct Compare : CompareTestBase {}; -TEST_P(Compare, Mat) -{ - if(mat1.type()==CV_8SC1) - { - cout << "\tUnsupported type\t\n"; - } +TEST_P(Compare, Mat) +{ + if(mat1.type() == CV_8SC1) + { + cout << "\tUnsupported type\t\n"; + } - int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; - const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; - int cmp_num = sizeof(cmp_codes) / sizeof(int); - for (int i = 0; i < cmp_num; ++i) - { + int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; + const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; + int cmp_num = sizeof(cmp_codes) / sizeof(int); + for (int i = 0; i < cmp_num; ++i) + { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k(i,j); - float val2 = mat2.at(i,j); - - ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2; - - } - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - t2=(double)cvGetTickCount();//kernel - cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - cldst.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + for(int i = 0; i < mat1.rows; ++i) + for(int j = 0; j < mat1.cols; ++j) + { + float val1 = mat1.at(i, j); + float val2 = mat2.at(i, j); + + ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; + + } + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + cldst.download(cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + if(j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::magnitudeSqr(clmat1,clmat2, cldst); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); + }; #endif } @@ -3433,95 +4174,110 @@ TEST_P(MagnitudeSqr, Mat) struct AddWeighted : ArithmTestBase {}; -TEST_P(AddWeighted, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - - virtual void SetUp() - { - - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - //cv::ocl::setBinpath(CLBINPATH); - } + int type; + int channels; + std::vector oclinfo; + + virtual void SetUp() + { + + type = GET_PARAM(0); + channels = GET_PARAM(1); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + //cv::ocl::setBinpath(CLBINPATH); + } }; TEST_P(Blend, Performance) { - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); - cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); - cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F); - cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels)); - - - double totalgputick_all = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100 - { - t1 = (double)cvGetTickCount(); - cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host); - cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host); - cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1); - cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1); - - t2 = (double)cvGetTickCount(); - cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst); - t2 = (double)cvGetTickCount() - t2; - - cv::Mat m; - gdst.download(m); - t1 = (double)cvGetTickCount() - t1; - - if (j == 0) - { - continue; - } - - totalgputick_all = t1 + totalgputick_all; - totalgputick_kernel = t2 + totalgputick_kernel; - }; - - cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - + cv::Size size(MWIDTH, MHEIGHT); + cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); + cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); + cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); + cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); + cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F); + cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels)); + + + double totalgputick_all = 0; + double totalgputick_kernel = 0; + double t1 = 0; + double t2 = 0; + + for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100 + { + t1 = (double)cvGetTickCount(); + cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host); + cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host); + cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1); + cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1); + + t2 = (double)cvGetTickCount(); + cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst); + t2 = (double)cvGetTickCount() - t2; + + cv::Mat m; + gdst.download(m); + t1 = (double)cvGetTickCount() - t1; + + if (j == 0) + { + continue; + } + + totalgputick_all = t1 + totalgputick_all; + totalgputick_kernel = t2 + totalgputick_kernel; + }; + + cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + + cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index 8eff35f..e0f2db7 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -85,70 +85,70 @@ IMPLEMENT_PARAM_CLASS(L2gradient, bool); PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient) { - int apperture_size; - bool useL2gradient; - //std::vector oclinfo; - - virtual void SetUp() - { - apperture_size = GET_PARAM(0); - useL2gradient = GET_PARAM(1); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } + int apperture_size; + bool useL2gradient; + //std::vector oclinfo; + + virtual void SetUp() + { + apperture_size = GET_PARAM(0); + useL2gradient = GET_PARAM(1); + + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + } }; TEST_P(Canny1, Performance) { - cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - double low_thresh = 100.0; - double high_thresh = 150.0; - - cv::Mat edges_gold; - cv::ocl::oclMat edges; - - double totalgputick=0; - double totalgputick_kernel=0; - - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload - - t2=(double)cvGetTickCount();//kernel - cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - edges.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick=t1+totalgputick; - - totalgputick_kernel=t2+totalgputick_kernel; + cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); - } + double low_thresh = 100.0; + double high_thresh = 150.0; + + cv::Mat edges_gold; + cv::ocl::oclMat edges; + + double totalgputick = 0; + double totalgputick_kernel = 0; + + double t1 = 0; + double t2 = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + + t1 = (double)cvGetTickCount();//gpu start1 + + cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload + + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_dst; + edges.download (cpu_dst);//download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + + totalgputick_kernel = t2 + totalgputick_kernel; + + } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine( - testing::Values(AppertureSize(3), AppertureSize(5)), - testing::Values(L2gradient(false), L2gradient(true)))); + testing::Values(AppertureSize(3), AppertureSize(5)), + testing::Values(L2gradient(false), L2gradient(true)))); diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_columnsum.cpp index c1f23fc..96ea26a 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_columnsum.cpp @@ -16,7 +16,7 @@ // // @Authors // Fangfang Bai fangfang@multicorewareinc.com -// +// // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -63,53 +63,53 @@ using namespace std; PARAM_TEST_CASE(ColumnSum) { - cv::Mat src; - //std::vector oclinfo; - - virtual void SetUp() - { - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } + cv::Mat src; + //std::vector oclinfo; + + virtual void SetUp() + { + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + } }; TEST_F(ColumnSum, Performance) { - cv::Size size(MWIDTH,MHEIGHT); + cv::Size size(MWIDTH, MHEIGHT); cv::Mat src = randomMat(size, CV_32FC1); cv::ocl::oclMat d_dst; - double totalgputick=0; - double totalgputick_kernel=0; - double t1=0; - double t2=0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t1 = 0; + double t2 = 0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat d_src(src); + cv::ocl::oclMat d_src(src); - t2=(double)cvGetTickCount();//kernel - cv::ocl::columnSum(d_src,d_dst); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::columnSum(d_src, d_dst); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - d_dst.download (cpu_dst);//download + cv::Mat cpu_dst; + d_dst.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } + } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; @@ -117,4 +117,4 @@ TEST_F(ColumnSum, Performance) -#endif \ No newline at end of file +#endif \ No newline at end of file diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index 6b929f4..c9c19d0 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -48,75 +48,75 @@ using namespace std; #ifdef HAVE_CLAMDFFT //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, bool) +PARAM_TEST_CASE(Dft, cv::Size, bool) { - cv::Size dft_size; - bool dft_rows; - vector info; - virtual void SetUp() - { - dft_size = GET_PARAM(0); - dft_rows = GET_PARAM(1); - cv::ocl::getDevice(info); - } + cv::Size dft_size; + bool dft_rows; + vector info; + virtual void SetUp() + { + dft_size = GET_PARAM(0); + dft_rows = GET_PARAM(1); + cv::ocl::getDevice(info); + } }; TEST_P(Dft, C2C) { - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); - int flags = 0; - flags |= dft_rows ? cv::DFT_ROWS : 0; + cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); + int flags = 0; + flags |= dft_rows ? cv::DFT_ROWS : 0; - cv::ocl::oclMat d_b; + cv::ocl::oclMat d_b; - double totalgputick=0; - double totalgputick_kernel=0; - double t1=0; - double t2=0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t1 = 0; + double t2 = 0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload + cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload - t2=(double)cvGetTickCount();//kernel - cv::ocl::dft(ga, d_b, a.size(), flags); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::dft(ga, d_b, a.size(), flags); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - d_b.download (cpu_dst);//download + cv::Mat cpu_dst; + d_b.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } + } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } TEST_P(Dft, R2CthenC2R) { - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); + cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); - int flags = 0; - //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet + int flags = 0; + //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet - cv::ocl::oclMat d_b, d_c; + cv::ocl::oclMat d_b, d_c; - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); - EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); + EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); } //INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index af98d47..ce46b89 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -57,96 +57,96 @@ using namespace std; PARAM_TEST_CASE(FilterTestBase, MatType, bool) { - int type; - cv::Scalar val; - - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - } - - void random_roi() - { - cv::RNG& rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - } + int type; + cv::Scalar val; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + cv::ocl::oclMat gdst1_whole; //bak + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdst1; //bak + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + mat2 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + dst1 = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + } + + void random_roi() + { + cv::RNG &rng = TS::ptr()->get_rng(); + + //randomize ROI + roicols = rng.uniform(1, mat1.cols); + roirows = rng.uniform(1, mat1.rows); + src1x = rng.uniform(0, mat1.cols - roicols); + src1y = rng.uniform(0, mat1.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); + + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + + gdst1_whole = dst1; + gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); + + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmask = mask_roi; + } }; @@ -155,762 +155,859 @@ PARAM_TEST_CASE(FilterTestBase, MatType, bool) PARAM_TEST_CASE(Blur, MatType, cv::Size, int) { - int type; - cv::Size ksize; - int bordertype; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + cv::Size ksize; + int bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(2); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; TEST_P(Blur, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat.cols-1; - roirows = mat.rows-1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + int ksize; + + //src mat + cv::Mat mat; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat.cols - 1; + roirows = mat.rows - 1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; struct Laplacian : LaplacianTestBase {}; -TEST_P(Laplacian, Accuracy) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - // iterations = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); - kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + //int iterations; + + //erode or dilate kernel + cv::Mat kernel; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + // iterations = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); + kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; -// erode +// erode -struct Erode : ErodeDilateBase{}; +struct Erode : ErodeDilateBase {}; TEST_P(Erode, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - ksize = GET_PARAM(3); - bordertype = GET_PARAM(4); - dx = 2; dy=0; - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + int dx, dy, ksize, bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dx = GET_PARAM(1); + dy = GET_PARAM(2); + ksize = GET_PARAM(3); + bordertype = GET_PARAM(4); + dx = 2; + dy = 0; + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; TEST_P(Sobel, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - bordertype = GET_PARAM(3); - dx = 1; dy=0; - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + int dx, dy, bordertype; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dx = GET_PARAM(1); + dy = GET_PARAM(2); + bordertype = GET_PARAM(3); + dx = 1; + dy = 0; + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; TEST_P(Scharr, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - sigma1 = rng.uniform(0.1, 1.0); - sigma2 = rng.uniform(0.1, 1.0); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols-1; - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - } + int type; + cv::Size ksize; + int bordertype; + + double sigma1, sigma2; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + ksize = GET_PARAM(1); + bordertype = GET_PARAM(2); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size = cv::Size(MWIDTH, MHEIGHT); + + sigma1 = rng.uniform(0.1, 1.0); + sigma2 = rng.uniform(0.1, 1.0); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + if(b) + { + roicols = mat1.cols - 1; + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + } }; TEST_P(GaussianBlur, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k info; - virtual void SetUp() - { - type = GET_PARAM(0); - mat_size = GET_PARAM(1); - flags = GET_PARAM(2); - - cv::ocl::getDevice(info); - } + int type; + cv::Size mat_size; + int flags; + vector info; + virtual void SetUp() + { + type = GET_PARAM(0); + mat_size = GET_PARAM(1); + flags = GET_PARAM(2); + + cv::ocl::getDevice(info); + } }; TEST_P(Gemm, Performance) { - cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); - cv::ocl::oclMat ocl_dst; + cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); + cv::ocl::oclMat ocl_dst; - double totalgputick=0; - double totalgputick_kernel=0; - double t1=0; - double t2=0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t1 = 0; + double t2 = 0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload - cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload - cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload + cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload + cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload + cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload - t2=(double)cvGetTickCount();//kernel - cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - ocl_dst.download (cpu_dst);//download + cv::Mat cpu_dst; + ocl_dst.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end + t1 = (double)cvGetTickCount() - t1;//gpu end - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + } + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( - testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/), - testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), - testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); + testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/), + testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), + testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); #endif \ No newline at end of file diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index b91d306..6344158 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -53,118 +53,125 @@ using namespace testing; using namespace std; using namespace cv; -struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; +struct getRect +{ + Rect operator ()(const CvAvgComp &e) const + { + return e.rect; + } +}; PARAM_TEST_CASE(HaarTestBase, int, int) { - //std::vector oclinfo; - cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::CascadeClassifier cpucascade, cpunestedCascade; - // Mat img; - - double scale; - int index; - - virtual void SetUp() - { - scale = 1.0; - index=0; - string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml"; - - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) - { - cout << "ERROR: Could not load classifier cascade" << endl; - cout << "Usage: facedetect [--cascade=]\n" - " [--scale[=\n" - " [filename|camera_index]\n" << endl ; - return; - } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath("E:\\"); - } + //std::vector oclinfo; + cv::ocl::OclCascadeClassifier cascade, nestedCascade; + cv::CascadeClassifier cpucascade, cpunestedCascade; + // Mat img; + + double scale; + int index; + + virtual void SetUp() + { + scale = 1.0; + index = 0; + string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; + + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) + { + cout << "ERROR: Could not load classifier cascade" << endl; + cout << "Usage: facedetect [--cascade=]\n" + " [--scale[=\n" + " [filename|camera_index]\n" << endl ; + return; + } + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums>0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath("E:\\"); + } }; ////////////////////////////////faceDetect///////////////////////////////////////////////// struct Haar : HaarTestBase {}; -TEST_F(Haar, FaceDetect) -{ - string imgName = "../../../samples/c/lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read test" << index <<".jpg" << std::endl; - return ; - } - - int i = 0; - double t = 0; - vector faces, oclfaces; - - const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; - - Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - t = (double)cvGetTickCount(); - for(int k= 0; k vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - //for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) - //{ - // Mat smallImgROI; - // Point center; - // Scalar color = colors[i%8]; - // int radius; - // center.x = cvRound((r->x + r->width*0.5)*scale); - // center.y = cvRound((r->y + r->height*0.5)*scale); - // radius = cvRound((r->width + r->height)*0.25*scale); - // circle( img, center, radius, color, 3, 8, 0 ); - //} - //namedWindow("result"); - //imshow("result",img); - //waitKey(0); - //destroyAllWindows(); +TEST_F(Haar, FaceDetect) +{ + string imgName = "../../../samples/c/lena.jpg"; + Mat img = imread( imgName, 1 ); + + if(img.empty()) + { + std::cout << "Couldn't read test" << index << ".jpg" << std::endl; + return ; + } + + int i = 0; + double t = 0; + vector faces, oclfaces; + + const static Scalar colors[] = { CV_RGB(0, 0, 255), + CV_RGB(0, 128, 255), + CV_RGB(0, 255, 255), + CV_RGB(0, 255, 0), + CV_RGB(255, 128, 0), + CV_RGB(255, 255, 0), + CV_RGB(255, 0, 0), + CV_RGB(255, 0, 255) + } ; + + Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); + MemStorage storage(cvCreateMemStorage(0)); + cvtColor( img, gray, CV_BGR2GRAY ); + resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + equalizeHist( smallImg, smallImg ); + + t = (double)cvGetTickCount(); + for(int k = 0; k < LOOP_TIMES; k++) + { + cpucascade.detectMultiScale( smallImg, faces, 1.1, + 3, 0 + | CV_HAAR_SCALE_IMAGE + , Size(30, 30), Size(0, 0) ); + } + t = (double)cvGetTickCount() - t ; + printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); + + cv::ocl::oclMat image; + CvSeq *_objects; + t = (double)cvGetTickCount(); + for(int k = 0; k < LOOP_TIMES; k++) + { + image.upload(smallImg); + _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, + 3, 0 + | CV_HAAR_SCALE_IMAGE + , Size(30, 30), Size(0, 0) ); + } + t = (double)cvGetTickCount() - t ; + printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + oclfaces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); + + //for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) + //{ + // Mat smallImgROI; + // Point center; + // Scalar color = colors[i%8]; + // int radius; + // center.x = cvRound((r->x + r->width*0.5)*scale); + // center.y = cvRound((r->y + r->height*0.5)*scale); + // radius = cvRound((r->width + r->height)*0.25*scale); + // circle( img, center, radius, color, 3, 8, 0 ); + //} + //namedWindow("result"); + //imshow("result",img); + //waitKey(0); + //destroyAllWindows(); } #endif // HAVE_OPENCL diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index e472204..903b8f9 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -46,16 +46,16 @@ #include "precomp.hpp" #include -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; using namespace std; #define FILTER_IMAGE "../../../samples/gpu/road.png" - + #ifndef MWC_TEST_UTILITY #define MWC_TEST_UTILITY @@ -76,92 +76,92 @@ class name \ } #endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -IMPLEMENT_PARAM_CLASS(WinSizw48, bool); - -PARAM_TEST_CASE(HOG, WinSizw48, bool) -{ - bool is48; - vector detector; - virtual void SetUp() - { - is48 = GET_PARAM(0); - if(is48) - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); - } - else - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); - } - } -}; - -TEST_P(HOG, Performance) -{ - cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - // define HOG related arguments +#endif // MWC_TEST_UTILITY + +IMPLEMENT_PARAM_CLASS(WinSizw48, bool); + +PARAM_TEST_CASE(HOG, WinSizw48, bool) +{ + bool is48; + vector detector; + virtual void SetUp() + { + is48 = GET_PARAM(0); + if(is48) + { + detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); + } + else + { + detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); + } + } +}; + +TEST_P(HOG, Performance) +{ + cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); + + // define HOG related arguments float scale = 1.05; int nlevels = 13; float gr_threshold = 8; float hit_threshold = 1.4; bool hit_threshold_auto = true; - int win_width = is48? 48 : 64; + int win_width = is48 ? 48 : 64; int win_stride_width = 8; int win_stride_height = 8; - bool gamma_corr = true; - + bool gamma_corr = true; + Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96) - Size win_stride(win_stride_width, win_stride_height); - + Size win_stride(win_stride_width, win_stride_height); + cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); + cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, + cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); gpu_hog.setSVMDetector(detector); - - double totalgputick=0; - double totalgputick_kernel=0; - - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - ocl::oclMat d_src(img);//upload - - t2=(double)cvGetTickCount();//kernel - - vector found; + + double totalgputick = 0; + double totalgputick_kernel = 0; + + double t1 = 0; + double t2 = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + t1 = (double)cvGetTickCount();//gpu start1 + + ocl::oclMat d_src(img);//upload + + t2 = (double)cvGetTickCount(); //kernel + + vector found; gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); - - t2 = (double)cvGetTickCount() - t2;//kernel - - // no download time for HOG - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick=t1+totalgputick; - - totalgputick_kernel=t2+totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; -} - - -INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false))); - + Size(0, 0), scale, gr_threshold); + + t2 = (double)cvGetTickCount() - t2;//kernel + + // no download time for HOG + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + + totalgputick_kernel = t2 + totalgputick_kernel; + + } + + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; +} + + +INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false))); + #endif //Have opencl \ No newline at end of file diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 9b2b995..651a595 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -66,280 +66,296 @@ MatType nulltype = -1; vector typeVector(MatType type) { - vector v; - v.push_back(type); - return v; + vector v; + v.push_back(type); + return v; } -PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool) +PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool) { - int type1,type2,type3,type4,type5; - cv::Scalar val; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int dst1x; - int dst1y; - int maskx; - int masky; - - //mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - //mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl mat - cv::ocl::oclMat clmat1; - cv::ocl::oclMat clmat2; - cv::ocl::oclMat clmask; - cv::ocl::oclMat cldst; - cv::ocl::oclMat cldst1; //bak - - //ocl mat with roi - cv::ocl::oclMat clmat1_roi; - cv::ocl::oclMat clmat2_roi; - cv::ocl::oclMat clmask_roi; - cv::ocl::oclMat cldst_roi; - cv::ocl::oclMat cldst1_roi; - - virtual void SetUp() - { - type1 = GET_PARAM(0); - type2 = GET_PARAM(1); - type3 = GET_PARAM(2); - type4 = GET_PARAM(3); - type5 = GET_PARAM(4); - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - double min = 1,max = 20; - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - if(type1!=nulltype) - { - mat1 = randomMat(rng, size, type1, min, max, false); - clmat1 = mat1; - } - if(type2!=nulltype) - { - mat2 = randomMat(rng, size, type2, min, max, false); - clmat2 = mat2; - } - if(type3!=nulltype) - { - dst = randomMat(rng, size, type3, min, max, false); - cldst = dst; - } - if(type4!=nulltype) - { - dst1 = randomMat(rng, size, type4, min, max, false); - cldst1 = dst1; - } - if(type5!=nulltype) - { - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., type5); - clmask = mask; - } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - } - - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols-1; //start - roirows = mat1.rows-1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty =1; - dst1x = 1; - dst1y =1; - maskx =1; - masky =1; - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - dst1x =0; - dst1y =0; - maskx =0; - masky =0; - }; - - if(type1!=nulltype) - { - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2!=nulltype) - { - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3!=nulltype) - { - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4!=nulltype) - { - dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5!=nulltype) - { - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } - - void random_roi() - { - cv::RNG& rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - dst1x = rng.uniform(0, dst1.cols - roicols); - dst1y = rng.uniform(0, dst1.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - if(type1!=nulltype) - { - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2!=nulltype) - { - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3!=nulltype) - { - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4!=nulltype) - { - dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5!=nulltype) - { - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } + int type1, type2, type3, type4, type5; + cv::Scalar val; + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int dstx; + int dsty; + int dst1x; + int dst1y; + int maskx; + int masky; + + //mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mask; + cv::Mat dst; + cv::Mat dst1; //bak, for two outputs + + //mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + cv::Mat dst1_roi; //bak + //std::vector oclinfo; + //ocl mat + cv::ocl::oclMat clmat1; + cv::ocl::oclMat clmat2; + cv::ocl::oclMat clmask; + cv::ocl::oclMat cldst; + cv::ocl::oclMat cldst1; //bak + + //ocl mat with roi + cv::ocl::oclMat clmat1_roi; + cv::ocl::oclMat clmat2_roi; + cv::ocl::oclMat clmask_roi; + cv::ocl::oclMat cldst_roi; + cv::ocl::oclMat cldst1_roi; + + virtual void SetUp() + { + type1 = GET_PARAM(0); + type2 = GET_PARAM(1); + type3 = GET_PARAM(2); + type4 = GET_PARAM(3); + type5 = GET_PARAM(4); + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + double min = 1, max = 20; + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums>0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + if(type1 != nulltype) + { + mat1 = randomMat(rng, size, type1, min, max, false); + clmat1 = mat1; + } + if(type2 != nulltype) + { + mat2 = randomMat(rng, size, type2, min, max, false); + clmat2 = mat2; + } + if(type3 != nulltype) + { + dst = randomMat(rng, size, type3, min, max, false); + cldst = dst; + } + if(type4 != nulltype) + { + dst1 = randomMat(rng, size, type4, min, max, false); + cldst1 = dst1; + } + if(type5 != nulltype) + { + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + cv::threshold(mask, mask, 0.5, 255., type5); + clmask = mask; + } + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + } + + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols - 1; //start + roirows = mat1.rows - 1; + src1x = 1; + src2x = 1; + src1y = 1; + src2y = 1; + dstx = 1; + dsty = 1; + dst1x = 1; + dst1y = 1; + maskx = 1; + masky = 1; + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src2x = 0; + src1y = 0; + src2y = 0; + dstx = 0; + dsty = 0; + dst1x = 0; + dst1y = 0; + maskx = 0; + masky = 0; + }; + + if(type1 != nulltype) + { + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); + } + if(type2 != nulltype) + { + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); + } + if(type3 != nulltype) + { + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); + } + if(type4 != nulltype) + { + dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); + //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); + } + if(type5 != nulltype) + { + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); + } + } + + void random_roi() + { + cv::RNG &rng = TS::ptr()->get_rng(); + + //randomize ROI + roicols = rng.uniform(1, mat1.cols); + roirows = rng.uniform(1, mat1.rows); + src1x = rng.uniform(0, mat1.cols - roicols); + src1y = rng.uniform(0, mat1.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); + dstx = rng.uniform(0, dst.cols - roicols); + dsty = rng.uniform(0, dst.rows - roirows); + dst1x = rng.uniform(0, dst1.cols - roicols); + dst1y = rng.uniform(0, dst1.rows - roirows); + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + + if(type1 != nulltype) + { + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); + } + if(type2 != nulltype) + { + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); + } + if(type3 != nulltype) + { + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); + } + if(type4 != nulltype) + { + dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); + //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); + } + if(type5 != nulltype) + { + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); + } + } }; ////////////////////////////////equalizeHist////////////////////////////////////////// struct equalizeHist : ImgprocTestBase {}; -TEST_P(equalizeHist, MatType) -{ - if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) - { - cout<<"Unsupported type"< oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - //dsize = GET_PARAM(1); - interpolation = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols-1; //start - src_roirows = mat1.rows-1; - dst_roicols=dst.cols-1; - dst_roirows=dst.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - - }else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols=dst.cols; - dst_roirows=dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows)); - dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows)); - - - } +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + int blockSize = 7, apertureSize = 3; + int borderType = cv::BORDER_REFLECT; + double kk = 2; + t0 = (double)cvGetTickCount();//cpu start + cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType); + t0 = (double)cvGetTickCount() - t0;//cpu end -}; + t1 = (double)cvGetTickCount();//gpu start1 + if(type1 != nulltype) + { + clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); + } + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_cldst; + cldst.download(cpu_cldst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 -/////warpAffine + if(j == 0) + continue; -struct WarpAffine : WarpTestBase{}; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; -TEST_P(WarpAffine, Mat) -{ - static const double coeffs[2][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} - }; - Mat M(2, 3, CV_64F, (void*)coeffs); - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gmap1_roi; - cv::ocl::oclMat gmap2_roi; + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; virtual void SetUp() { - srcType = GET_PARAM(0); - map1Type = GET_PARAM(1); - map2Type = GET_PARAM(2); - interpolation = GET_PARAM(3); - bordertype = GET_PARAM(4); + type = GET_PARAM(0); + //dsize = GET_PARAM(1); + interpolation = GET_PARAM(1); - cv::RNG& rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); + size = cv::Size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + src_roicols = mat1.cols - 1; //start + src_roirows = mat1.rows - 1; + dst_roicols = dst.cols - 1; + dst_roirows = dst.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + + } + else + { + src_roicols = mat1.cols; + src_roirows = mat1.rows; + dst_roicols = dst.cols; + dst_roirows = dst.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + + }; + mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); + dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + + } + +}; + +/////warpAffine + +struct WarpAffine : WarpTestBase {}; + +TEST_P(WarpAffine, Mat) +{ + static const double coeffs[2][3] = + { + {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, + {sin(3.14 / 6), cos(3.14 / 6), -100.0} + }; + Mat M(2, 3, CV_64F, (void *)coeffs); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + gmat1 = mat1_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } +#else + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + gmat1 = mat1_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); + }; +#endif + +} + + +// warpPerspective + +struct WarpPerspective : WarpTestBase {}; + +TEST_P(WarpPerspective, Mat) +{ + static const double coeffs[3][3] = + { + {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, + {sin(3.14 / 6), cos(3.14 / 6), -100.0}, + {0.0, 0.0, 1.0} + }; + Mat M(3, 3, CV_64F, (void *)coeffs); + +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + gmat1 = mat1_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } +#else + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + gmat1 = mat1_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// remap +////////////////////////////////////////////////////////////////////////////////////////////////// + +PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) +{ + int srcType; + int map1Type; + int map2Type; + cv::Scalar val; + + int interpolation; + int bordertype; + + cv::Mat src; + cv::Mat dst; + cv::Mat map1; + cv::Mat map2; + + + int src_roicols; + int src_roirows; + int dst_roicols; + int dst_roirows; + int map1_roicols; + int map1_roirows; + int map2_roicols; + int map2_roirows; + int srcx; + int srcy; + int dstx; + int dsty; + int map1x; + int map1y; + int map2x; + int map2y; + + cv::Mat src_roi; + cv::Mat dst_roi; + cv::Mat map1_roi; + cv::Mat map2_roi; + + //ocl mat for testing + cv::ocl::oclMat gdst; + + //ocl mat with roi + cv::ocl::oclMat gsrc_roi; + cv::ocl::oclMat gdst_roi; + cv::ocl::oclMat gmap1_roi; + cv::ocl::oclMat gmap2_roi; + + virtual void SetUp() + { + srcType = GET_PARAM(0); + map1Type = GET_PARAM(1); + map2Type = GET_PARAM(2); + interpolation = GET_PARAM(3); + bordertype = GET_PARAM(4); + + cv::RNG &rng = TS::ptr()->get_rng(); cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT); cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); double min = 5, max = 16; - if(srcType != nulltype) - { - src = randomMat(rng, srcSize, srcType, min, max, false); - } - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2&& map2Type == nulltype)) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); + if(srcType != nulltype) + { + src = randomMat(rng, srcSize, srcType, min, max, false); + } + if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) + { + map1 = randomMat(rng, map1Size, map1Type, min, max, false); + + } + else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) + { + map1 = randomMat(rng, map1Size, map1Type, min, max, false); + map2 = randomMat(rng, map1Size, map1Type, min, max, false); + } + + else + cout << "The wrong input type" << endl; + + dst = randomMat(rng, map1Size, srcType, min, max, false); + switch (src.channels()) + { + case 1: + val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0); + break; + case 2: + val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0); + break; + case 3: + val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0); + break; + case 4: + val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0)); + break; + } + + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + //if you want to use undefault device, set it here + //setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + if(b) + { + //randomize ROI + dst_roicols = dst.cols - 1; + dst_roirows = dst.rows - 1; + + src_roicols = src.cols - 1; + src_roirows = src.rows - 1; + + + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + } + else + { + dst_roicols = dst.cols; + dst_roirows = dst.rows; + + src_roicols = src.cols; + src_roirows = src.rows; + + + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + } + map1_roicols = dst_roicols; + map1_roirows = dst_roirows; + map2_roicols = dst_roicols; + map2_roirows = dst_roirows; + map1x = dstx; + map1y = dsty; + map2x = dstx; + map2y = dsty; + + if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) + { + map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); + gmap1_roi = map1_roi; + } + + else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) + { + map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); + map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows)); + gmap1_roi = map1_roi; + gmap2_roi = map2_roi; + } + dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows)); + + } +}; + +TEST_P(Remap, Mat) +{ + if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) + { + cout << "LINEAR don't support the map1Type and map2Type" << endl; + return; + } + int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; + const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = 0; k < 2; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start + gsrc_roi = src_roi; + gdst = dst; + gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + t2 = (double)cvGetTickCount();//kernel + cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_dst; + gdst.download(cpu_dst); + + t1 = (double)cvGetTickCount() - t1;//gpu end + + if (j == 0) + continue; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } +#else + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + gdst = dst; + gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + gsrc_roi = src_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); + }; +#endif + +} + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// resize + +PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) +{ + int type; + cv::Size dsize; + double fx, fy; + int interpolation; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int src_roicols; + int src_roirows; + int dst_roicols; + int dst_roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dsize = GET_PARAM(1); + fx = GET_PARAM(2); + fy = GET_PARAM(3); + interpolation = GET_PARAM(4); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + if(dsize == cv::Size() && !(fx > 0 && fy > 0)) + { + cout << "invalid dsize and fx fy" << endl; + return; + } + + if(dsize == cv::Size()) + { + dsize.width = (int)(size.width * fx); + dsize.height = (int)(size.height * fy); + } + + mat1 = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, dsize, type, 5, 16, false); + + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + src_roicols = mat1.cols - 1; //start + src_roirows = mat1.rows - 1; + dst_roicols = dst.cols - 1; + dst_roirows = dst.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; + + } + else + { + src_roicols = mat1.cols; + src_roirows = mat1.rows; + dst_roicols = dst.cols; + dst_roirows = dst.rows; + src1x = 0; + src1y = 0; + dstx = 0; + dsty = 0; + + }; + mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); + dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + + } + +}; + +TEST_P(Resize, Mat) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + + gmat1 = mat1_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } +#else + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + gmat1 = mat1_roi; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); + }; +#endif + +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +//threshold + +PARAM_TEST_CASE(Threshold, MatType, ThreshOp) +{ + int type; + int threshOp; + + //src mat + cv::Mat mat1; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + threshOp = GET_PARAM(1); - } - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); - map2 = randomMat(rng, map1Size, map1Type, min, max, false); - } + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); - else - cout<<"The wrong input type"< 0); - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); //cv::ocl::setBinpath(CLBINPATH); } void Has_roi(int b) { + //cv::RNG& rng = TS::ptr()->get_rng(); if(b) { //randomize ROI - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - - src_roicols = src.cols - 1; - src_roirows = src.rows - 1; + roicols = mat1.cols - 1; //start + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + dstx = 1; + dsty = 1; - - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; } else { - dst_roicols = dst.cols; - dst_roirows = dst.rows; - - src_roicols = src.cols; - src_roirows = src.rows; - - - srcx = 0; - srcy = 0; + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; dstx = 0; dsty = 0; - } - map1_roicols = dst_roicols; - map1_roirows = dst_roirows; - map2_roicols = dst_roicols; - map2_roirows = dst_roirows; - map1x = dstx; - map1y = dsty; - map2x = dstx; - map2y = dsty; - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2&& map2Type == nulltype)) - { - map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows)); - gmap1_roi = map1_roi; - } + }; + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows)); - map2_roi = map2(Rect(map2x,map2y,map2_roicols,map2_roirows)); - gmap1_roi = map1_roi; - gmap2_roi = map2_roi; - } - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows)); } }; -TEST_P(Remap, Mat) +TEST_P(Threshold, Mat) { - if((interpolation == 1 && map1Type == CV_16SC2) ||(map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) { - cout << "LINEAR don't support the map1Type and map2Type" << endl; - return; - } - int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; - const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k = 0; k < 2; k++){ totalcputick = 0; totalgputick = 0; totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES+1; j++) + for(int j = 0; j < LOOP_TIMES + 1; j ++) { Has_roi(k); + double maxVal = randomDouble(20.0, 127.0); + double thresh = randomDouble(0.0, maxVal); t0 = (double)cvGetTickCount();//cpu start - cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); + cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); t0 = (double)cvGetTickCount() - t0;//cpu end - t1 = (double)cvGetTickCount();//gpu start - gsrc_roi = src_roi; - gdst = dst; - gdst_roi = gdst(Rect(dstx,dsty,dst_roicols,dst_roirows)); + t1 = (double)cvGetTickCount();//gpu start1 - t2 = (double)cvGetTickCount();//kernel - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gmat1 = mat1_roi; + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); t2 = (double)cvGetTickCount() - t2;//kernel - + cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if (j == 0) + if(j == 0) continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } #else - for(int j = 0; j < 2; j ++) + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) { Has_roi(j); - gdst = dst; - gdst_roi = gdst(Rect(dstx,dsty,dst_roicols,dst_roirows)); - gsrc_roi = src_roi; - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); + double maxVal = randomDouble(20.0, 127.0); + double thresh = randomDouble(0.0, maxVal); + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gmat1 = mat1_roi; + + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); }; #endif } +/////////////////////////////////////////////////////////////////////////////////////////////////// +//meanShift +PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) +{ + int type, typeCoor; + int sp, sr; + cv::TermCriteria crit; + //src mat + cv::Mat src; + cv::Mat dst; + cv::Mat dstCoor; -///////////////////////////////////////////////////////////////////////////////////////////////// -// resize + //set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; -PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) -{ - int type; - cv::Size dsize; - double fx, fy; - int interpolation; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dsize = GET_PARAM(1); - fx = GET_PARAM(2); - fy = GET_PARAM(3); - interpolation = GET_PARAM(4); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - if(dsize == cv::Size() && !(fx > 0 && fy > 0)) - { - cout << "invalid dsize and fx fy" << endl; - return; - } - - if(dsize == cv::Size()) - { - dsize.width = (int)(size.width * fx); - dsize.height = (int)(size.height * fy); - } - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, dsize, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols-1; //start - src_roirows = mat1.rows-1; - dst_roicols=dst.cols-1; - dst_roirows=dst.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - - }else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols=dst.cols; - dst_roirows=dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows)); - dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows)); - - - } + //src mat with roi + cv::Mat src_roi; + cv::Mat dst_roi; + cv::Mat dstCoor_roi; -}; + //ocl dst mat + cv::ocl::oclMat gdst; + cv::ocl::oclMat gdstCoor; -TEST_P(Resize, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; + //ocl mat with roi + cv::ocl::oclMat gsrc_roi; + cv::ocl::oclMat gdst_roi; + cv::ocl::oclMat gdstCoor_roi; -} + virtual void SetUp() + { + type = GET_PARAM(0); + typeCoor = GET_PARAM(1); + sp = GET_PARAM(2); + sr = GET_PARAM(3); + crit = GET_PARAM(4); -///////////////////////////////////////////////////////////////////////////////////////////////// -//threshold + cv::RNG &rng = TS::ptr()->get_rng(); -PARAM_TEST_CASE(Threshold, MatType, ThreshOp) -{ - int type; - int threshOp; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - threshOp = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols-1; //start - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty =1; - - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - - } -}; + // MWIDTH=256, MHEIGHT=256. defined in utility.hpp + cv::Size size = cv::Size(MWIDTH, MHEIGHT); -TEST_P(Threshold, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath(CLBINPATH); + } -PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) -{ - int type, typeCoor; - int sp, sr; - cv::TermCriteria crit; - //src mat - cv::Mat src; - cv::Mat dst; - cv::Mat dstCoor; - - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat src_roi; - cv::Mat dst_roi; - cv::Mat dstCoor_roi; - - //ocl dst mat - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdstCoor; - - //std::vector oclinfo; - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gdstCoor_roi; - - virtual void SetUp() - { - type = GET_PARAM(0); - typeCoor = GET_PARAM(1); - sp = GET_PARAM(2); - sr = GET_PARAM(3); - crit = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - - // MWIDTH=256, MHEIGHT=256. defined in utility.hpp - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - src = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dstCoor = randomMat(rng, size, typeCoor, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - //randomize ROI - roicols = src.cols - 1; - roirows = src.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - }else - { - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); - - gdst = dst; - gdstCoor = dstCoor; - } + void Has_roi(int b) + { + if(b) + { + //randomize ROI + roicols = src.cols - 1; + roirows = src.rows - 1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = src.cols; + roirows = src.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + src_roi = src(Rect(srcx, srcy, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); + + gdst = dst; + gdstCoor = dstCoor; + } }; /////////////////////////meanShiftFiltering///////////////////////////// @@ -1579,53 +1756,67 @@ struct meanShiftFiltering : meanShiftTestBase {}; TEST_P(meanShiftFiltering, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double t1=0; - double t2=0; - for(int k=0;k<2;k++) - { - double totalgputick=0; - double totalgputick_kernel=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - Has_roi(k); +#ifndef PRINT_KERNEL_RUN_TIME + double t1 = 0; + double t2 = 0; + for(int k = 0; k < 2; k++) + { + double totalgputick = 0; + double totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - t2=(double)cvGetTickCount();//kernel - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_gdst; - gdst.download(cpu_gdst);//download + cv::Mat cpu_gdst; + gdst.download(cpu_gdst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - }; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); + }; #endif } @@ -1636,55 +1827,69 @@ struct meanShiftProc : meanShiftTestBase {}; TEST_P(meanShiftProc, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double t1=0; - double t2=0; - for(int k=0;k<2;k++) - { - double totalgputick=0; - double totalgputick_kernel=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - Has_roi(k); +#ifndef PRINT_KERNEL_RUN_TIME + double t1 = 0; + double t2 = 0; + for(int k = 0; k < 2; k++) + { + double totalgputick = 0; + double totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - t2=(double)cvGetTickCount();//kernel - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_gdstCoor; - gdstCoor.download(cpu_gdstCoor);//download + cv::Mat cpu_gdstCoor; + gdstCoor.download(cpu_gdstCoor);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); + gsrc_roi = src_roi; + gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi + gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - }; + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); + }; #endif } @@ -1692,15 +1897,15 @@ TEST_P(meanShiftProc, Mat) /////////////////////////////////////////////////////////////////////////////////////////// //hist -void calcHistGold(const cv::Mat& src, cv::Mat& hist) +void calcHistGold(const cv::Mat &src, cv::Mat &hist) { hist.create(1, 256, CV_32SC1); hist.setTo(cv::Scalar::all(0)); - int* hist_row = hist.ptr(); + int *hist_row = hist.ptr(); for (int y = 0; y < src.rows; ++y) { - const uchar* src_row = src.ptr(y); + const uchar *src_row = src.ptr(y); for (int x = 0; x < src.cols; ++x) ++hist_row[src_row[x]]; @@ -1723,23 +1928,23 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType) cv::Mat src_roi; //ocl dst mat, dst_hist and gdst_hist don't have roi cv::ocl::oclMat gdst_hist; - + //ocl mat with roi cv::ocl::oclMat gsrc_roi; -// std::vector oclinfo; + // std::vector oclinfo; virtual void SetUp() { type_src = GET_PARAM(0); - + cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size = cv::Size(MWIDTH, MHEIGHT); src = randomMat(rng, size, type_src, 0, 256, false); -// int devnums = getDevice(oclinfo); -// CV_Assert(devnums > 0); + // int devnums = getDevice(oclinfo); + // CV_Assert(devnums > 0); //if you want to use undefault device, set it here //setDevice(oclinfo[0]); } @@ -1749,11 +1954,12 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType) if(b) { //randomize ROI - roicols = src.cols-1; - roirows = src.rows-1; + roicols = src.cols - 1; + roirows = src.rows - 1; srcx = 1; srcy = 1; - }else + } + else { roicols = src.cols; roirows = src.rows; @@ -1769,59 +1975,73 @@ struct calcHist : histTestBase {}; TEST_P(calcHist, Mat) { -#ifndef PRINT_KERNEL_RUN_TIME - double t0=0; - double t1=0; - double t2=0; - for(int k=0;k<2;k++) - { - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - calcHistGold(src_roi, dst_hist); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - - t2=(double)cvGetTickCount();//kernel - cv::ocl::calcHist(gsrc_roi, gdst_hist); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_hist; - gdst_hist.download(cpu_hist);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalcputick=t0+totalcputick; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +#ifndef PRINT_KERNEL_RUN_TIME + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = 0; k < 2; k++) + { + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + + t0 = (double)cvGetTickCount();//cpu start + calcHistGold(src_roi, dst_hist); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 + + gsrc_roi = src_roi; + + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::calcHist(gsrc_roi, gdst_hist); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_hist; + gdst_hist.download(cpu_hist);//download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalcputick = t0 + totalcputick; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = 0; j < 2; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::calcHist(gsrc_roi, gdst_hist); - }; + for(int j = 0; j < 2; j ++) + { + Has_roi(j); + + gsrc_roi = src_roi; + + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::calcHist(gsrc_roi, gdst_hist); + }; #endif } @@ -1829,103 +2049,103 @@ TEST_P(calcHist, Mat) //************test******************* INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_8UC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( -// ONE_TYPE(CV_8UC1), -// NULL_TYPE, -// ONE_TYPE(CV_8UC1), -// NULL_TYPE, -// NULL_TYPE, -// Values(false))); // Values(false) is the reserved parameter -// -// + ONE_TYPE(CV_8UC1), + NULL_TYPE, + ONE_TYPE(CV_8UC1), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter + +INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( + Values(CV_8UC1, CV_8UC3), + NULL_TYPE, + Values(CV_8UC1, CV_8UC3), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter + + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( - Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/), - NULL_TYPE, - Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter + Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/), + NULL_TYPE, + Values(CV_8UC1, CV_8UC4/*,CV_32SC1*/), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine( - Values(CV_8UC1,CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter + Values(CV_8UC1, CV_32FC1), + NULL_TYPE, + ONE_TYPE(CV_32FC1), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine( - Values(CV_8UC1,CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter + Values(CV_8UC1, CV_32FC1), + NULL_TYPE, + ONE_TYPE(CV_32FC1), + NULL_TYPE, + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_32SC1), - ONE_TYPE(CV_32FC1), - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter + ONE_TYPE(CV_8UC1), + NULL_TYPE, + ONE_TYPE(CV_32SC1), + ONE_TYPE(CV_32FC1), + NULL_TYPE, + Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, + (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), + (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine - (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); + (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, + (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), + (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), - Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), + Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( - Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY), - ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), - ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)))); + Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY), + ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), + ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)))); INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) - )); + ONE_TYPE(CV_8UC4), + ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering + Values(5), + Values(6), + Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) + )); INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2), - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1)) - )); + ONE_TYPE(CV_8UC4), + ONE_TYPE(CV_16SC2), + Values(5), + Values(6), + Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) + )); INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_32FC1, CV_16SC2, CV_32FC2),Values(-1,CV_32FC1), - Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR), - Values((int)cv::BORDER_CONSTANT))); + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1), + Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR), + Values((int)cv::BORDER_CONSTANT))); INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( - ONE_TYPE(CV_8UC1), - ONE_TYPE(CV_32SC1) //no use -)); + ONE_TYPE(CV_8UC1), + ONE_TYPE(CV_32SC1) //no use + )); #endif // HAVE_OPENCL diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index 1e6b0f7..febea8b 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -87,76 +87,76 @@ IMPLEMENT_PARAM_CLASS(Channels, int) IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size); -const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; +const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod) { - cv::Size size; - cv::Size templ_size; - int cn; - int method; - //vector oclinfo; - - virtual void SetUp() - { - size = GET_PARAM(0); - templ_size = GET_PARAM(1); - cn = GET_PARAM(2); - method = GET_PARAM(3); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } + cv::Size size; + cv::Size templ_size; + int cn; + int method; + //vector oclinfo; + + virtual void SetUp() + { + size = GET_PARAM(0); + templ_size = GET_PARAM(1); + cn = GET_PARAM(2); + method = GET_PARAM(3); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + } }; struct MatchTemplate8U : MatchTemplate {}; TEST_P(MatchTemplate8U, Performance) { - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; - std::cout << "Channels: " << cn << std::endl; + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; + std::cout << "Channels: " << cn << std::endl; - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); + cv::Mat dst_gold; + cv::ocl::oclMat dst; - - double totalgputick=0; - double totalgputick_kernel=0; - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { + double totalgputick = 0; + double totalgputick_kernel = 0; - t1 = (double)cvGetTickCount();//gpu start1 + double t1 = 0; + double t2 = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + + t1 = (double)cvGetTickCount();//gpu start1 cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload + cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload - t2=(double)cvGetTickCount();//kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - dst.download (cpu_dst);//download + cv::Mat cpu_dst; + dst.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + if(j == 0) + continue; - totalgputick=t1+totalgputick; - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick = t1 + totalgputick; + totalgputick_kernel = t2 + totalgputick_kernel; - } + } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } @@ -165,68 +165,68 @@ TEST_P(MatchTemplate8U, Performance) struct MatchTemplate32F : MatchTemplate {}; TEST_P(MatchTemplate32F, Performance) { - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; - std::cout << "Channels: " << cn << std::endl; - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; + std::cout << "Channels: " << cn << std::endl; + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); + cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; + cv::Mat dst_gold; + cv::ocl::oclMat dst; - double totalgputick=0; - double totalgputick_kernel=0; + double totalgputick = 0; + double totalgputick_kernel = 0; - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES; j ++) - { + double t1 = 0; + double t2 = 0; + for(int j = 0; j < LOOP_TIMES; j ++) + { - t1 = (double)cvGetTickCount();//gpu start1 + t1 = (double)cvGetTickCount();//gpu start1 cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload + cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload + + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + t2 = (double)cvGetTickCount() - t2;//kernel - t2=(double)cvGetTickCount();//kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + dst.download (cpu_dst);//download - cv::Mat cpu_dst; - dst.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 - t1 = (double)cvGetTickCount() - t1;//gpu end1 + totalgputick = t1 + totalgputick; - totalgputick=t1+totalgputick; - - totalgputick_kernel=t2+totalgputick_kernel; + totalgputick_kernel = t2 + totalgputick_kernel; - } + } - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; } -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, - testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4)/*, Channels(3)*/), - ALL_TEMPLATE_METHODS - ) -); +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, + testing::Combine( + testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), + testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), + testing::Values(Channels(1), Channels(4)/*, Channels(3)*/), + ALL_TEMPLATE_METHODS + ) + ); INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4) /*, Channels(3)*/), - testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); + testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), + testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), + testing::Values(Channels(1), Channels(4) /*, Channels(3)*/), + testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); #endif //HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 434a62f..f4af91d 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -54,139 +54,155 @@ using namespace cv::ocl; ////////////////////////////////converto///////////////////////////////////////////////// PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType) { - int type; - int dst_type; - - //src mat - cv::Mat mat; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dst_type = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols-1; //start - roirows = mat.rows-1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty =1; - }else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - } + int type; + int dst_type; + + //src mat + cv::Mat mat; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + dst_type = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols - 1; //start + roirows = mat.rows - 1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + } + else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + }; + + mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat = mat_roi; + } }; -struct ConvertTo :ConvertToTestBase {}; - -TEST_P(ConvertTo, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols-1; //start - roirows = mat.rows-1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty =1; - maskx = 1; - masky = 1; - }else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - //gmask = mask_roi; - } + int type; + + cv::Mat mat; + cv::Mat mask; + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dstx; + int dsty; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat mask_roi; + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst; + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + dst = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols - 1; //start + roirows = mat.rows - 1; + srcx = 1; + srcy = 1; + dstx = 1; + dsty = 1; + maskx = 1; + masky = 1; + } + else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dstx = 0; + dsty = 0; + maskx = 0; + masky = 0; + }; + + mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + + //gdst_whole = dst; + //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); + + //gmat = mat_roi; + //gmask = mask_roi; + } }; -struct CopyTo :CopyToTestBase {}; - -TEST_P(CopyTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gmat_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols-1; //start - roirows = mat.rows-1; - srcx = 1; - srcy = 1; - maskx = 1; - masky = 1; - }else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); - mask_roi = mask(Rect(maskx,masky,roicols,roirows)); - - //gmat_whole = mat; - //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); - - //gmask = mask_roi; - } + int type; + cv::Scalar val; + + cv::Mat mat; + cv::Mat mask; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int maskx; + int masky; + + //src mat with roi + cv::Mat mat_roi; + cv::Mat mask_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gmat_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gmask; + + virtual void SetUp() + { + type = GET_PARAM(0); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, type, 5, 16, false); + mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + + cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols - 1; //start + roirows = mat.rows - 1; + srcx = 1; + srcy = 1; + maskx = 1; + masky = 1; + } + else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + maskx = 0; + masky = 0; + }; + + mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + mask_roi = mask(Rect(maskx, masky, roicols, roirows)); + + //gmat_whole = mat; + //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); + + //gmask = mask_roi; + } }; -struct SetTo :SetToTestBase {}; - -TEST_P(SetTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;kget_rng(); + cv::Size size(MWIDTH, MHEIGHT); + mat = randomMat(rng, size, type, 5, 16, false); + } +}; +TEST_P(DataTransfer, perf) +{ + double totaluploadtick = 0; + double totaldownloadtick = 0; + double totaltick = 0; + double t0 = 0; + double t1 = 0; + cv::Mat cpu_dst; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + t0 = (double)cvGetTickCount(); + gmat_whole.upload(mat);//upload + t0 = (double)cvGetTickCount() - t0; + + t1 = (double)cvGetTickCount(); + gmat_whole.download(cpu_dst);//download + t1 = (double)cvGetTickCount() - t1; + + if(j == 0) + continue; + totaluploadtick = t0 + totaluploadtick; + totaldownloadtick = t1 + totaldownloadtick; + } + EXPECT_MAT_SIMILAR(mat, cpu_dst, 0.0); + totaltick = totaluploadtick + totaldownloadtick; + cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; +} +//**********test************ INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4))); + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4))); INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter + Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter +INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine( + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), + Values(false))); // Values(false) is the reserved parameter #endif diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_pyrdown.cpp index 5d92a21..2cea4df 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_pyrdown.cpp @@ -56,28 +56,28 @@ using namespace std; PARAM_TEST_CASE(PyrDown, MatType, int) { - int type; - int channels; - //src mat - cv::Mat mat1; - cv::Mat dst; - - //std::vector oclinfo; - //ocl dst mat for testing - - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } - - + int type; + int channels; + //src mat + cv::Mat mat1; + cv::Mat dst; + + //std::vector oclinfo; + //ocl dst mat for testing + + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gdst; + + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + } + + }; #define VARNAME(A) string(#A); @@ -85,48 +85,48 @@ PARAM_TEST_CASE(PyrDown, MatType, int) ////////////////////////////////PyrDown///////////////////////////////////////////////// TEST_P(PyrDown, Mat) { - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - - - cv::ocl::oclMat gdst; - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat gmat1(mat1); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrDown(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) - { - continue; - } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - + cv::Size size(MWIDTH, MHEIGHT); + cv::RNG &rng = TS::ptr()->get_rng(); + mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + + + cv::ocl::oclMat gdst; + double totalgputick = 0; + double totalgputick_kernel = 0; + + double t1 = 0; + double t2 = 0; + + for (int j = 0; j < LOOP_TIMES + 1; j ++) + { + + t1 = (double)cvGetTickCount();//gpu start1 + + cv::ocl::oclMat gmat1(mat1); + + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::pyrDown(gmat1, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_dst; + gdst.download(cpu_dst); + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if (j == 0) + { + continue; + } + + totalgputick = t1 + totalgputick; + + totalgputick_kernel = t2 + totalgputick_kernel; + + } + + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } //********test**************** diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp index 5cefba7..a023353 100644 --- a/modules/ocl/perf/perf_pyrup.cpp +++ b/modules/ocl/perf/perf_pyrup.cpp @@ -56,64 +56,64 @@ using namespace std; PARAM_TEST_CASE(PyrUp, MatType, int) { - int type; - int channels; - //std::vector oclinfo; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } + int type; + int channels; + //std::vector oclinfo; + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + } }; TEST_P(PyrUp, Performance) { - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrUp(srcMat, dst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download(cpu_dst); //download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) - { - continue; - } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - + cv::Size size(MWIDTH, MHEIGHT); + cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + cv::Mat dst_gold; + cv::ocl::oclMat dst; + + + double totalgputick = 0; + double totalgputick_kernel = 0; + + double t1 = 0; + double t2 = 0; + + for (int j = 0; j < LOOP_TIMES + 1; j ++) + { + t1 = (double)cvGetTickCount();//gpu start1 + + cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload + + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::pyrUp(srcMat, dst); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_dst; + dst.download(cpu_dst); //download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if (j == 0) + { + continue; + } + + totalgputick = t1 + totalgputick; + + totalgputick_kernel = t2 + totalgputick_kernel; + + } + + + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + + } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine( diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 9826efc..5502d7f 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -53,403 +53,435 @@ using namespace std; using namespace cv::ocl; PARAM_TEST_CASE(MergeTestBase, MatType, int) { - int type; - int channels; - - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mat3; - cv::Mat mat4; - - //dst mat - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int src3x; - int src3y; - int src4x; - int src4y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mat3_roi; - cv::Mat mat4_roi; - - //dst mat with roi - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gmat3; - cv::ocl::oclMat gmat4; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols-1; //start - roirows = mat1.rows-1; - src1x = 1; - src1y = 1; - src2x = 1; - src2y = 1; - src3x = 1; - src3y = 1; - src4x = 1; - src4y = 1; - dstx = 1; - dsty =1; - - }else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - src2x = 0; - src2y = 0; - src3x = 0; - src3y = 0; - src4x = 0; - src4y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows)); - mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows)); - mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows)); - mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows)); - - - dst_roi = dst(Rect(dstx,dsty,roicols,roirows)); - } + int type; + int channels; + + //src mat + cv::Mat mat1; + cv::Mat mat2; + cv::Mat mat3; + cv::Mat mat4; + + //dst mat + cv::Mat dst; + + // set up roi + int roicols; + int roirows; + int src1x; + int src1y; + int src2x; + int src2y; + int src3x; + int src3y; + int src4x; + int src4y; + int dstx; + int dsty; + + //src mat with roi + cv::Mat mat1_roi; + cv::Mat mat2_roi; + cv::Mat mat3_roi; + cv::Mat mat4_roi; + + //dst mat with roi + cv::Mat dst_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat1; + cv::ocl::oclMat gmat2; + cv::ocl::oclMat gmat3; + cv::ocl::oclMat gmat4; + cv::ocl::oclMat gdst; + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat1.cols - 1; //start + roirows = mat1.rows - 1; + src1x = 1; + src1y = 1; + src2x = 1; + src2y = 1; + src3x = 1; + src3y = 1; + src4x = 1; + src4y = 1; + dstx = 1; + dsty = 1; + + } + else + { + roicols = mat1.cols; + roirows = mat1.rows; + src1x = 0; + src1y = 0; + src2x = 0; + src2y = 0; + src3x = 0; + src3y = 0; + src4x = 0; + src4y = 0; + dstx = 0; + dsty = 0; + }; + + mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); + mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); + mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows)); + mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows)); + + + dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + } }; struct Merge : MergeTestBase {}; -TEST_P(Merge, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k dev_src; - dev_src.push_back(mat1_roi); - dev_src.push_back(mat2_roi); - dev_src.push_back(mat3_roi); - dev_src.push_back(mat4_roi); - t0 = (double)cvGetTickCount();//cpu start - cv::merge(dev_src, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 ] - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - std::vector dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - t2=(double)cvGetTickCount();//kernel - cv::ocl::merge(dev_gsrc, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - - } - if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";}; - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - } +TEST_P(Merge, Accuracy) +{ +#ifndef PRINT_KERNEL_RUN_TIME + double totalcputick = 0; + double totalgputick = 0; + double totalgputick_kernel = 0; + double t0 = 0; + double t1 = 0; + double t2 = 0; + for(int k = LOOPROISTART; k < LOOPROIEND; k++) + { + totalcputick = 0; + totalgputick = 0; + totalgputick_kernel = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + Has_roi(k); + std::vector dev_src; + dev_src.push_back(mat1_roi); + dev_src.push_back(mat2_roi); + dev_src.push_back(mat3_roi); + dev_src.push_back(mat4_roi); + t0 = (double)cvGetTickCount();//cpu start + cv::merge(dev_src, dst_roi); + t0 = (double)cvGetTickCount() - t0;//cpu end + + t1 = (double)cvGetTickCount();//gpu start1 ] + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmat3 = mat3_roi; + gmat4 = mat4_roi; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + std::vector dev_gsrc; + dev_gsrc.push_back(gmat1); + dev_gsrc.push_back(gmat2); + dev_gsrc.push_back(gmat3); + dev_gsrc.push_back(gmat4); + t2 = (double)cvGetTickCount(); //kernel + cv::ocl::merge(dev_gsrc, gdst); + t2 = (double)cvGetTickCount() - t2;//kernel + cv::Mat cpu_dst; + gdst_whole.download (cpu_dst);//download + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + totalcputick = t0 + totalcputick; + totalgputick_kernel = t2 + totalgputick_kernel; + + } + if(k == 0) + { + cout << "no roi\n"; + } + else + { + cout << "with roi\n"; + }; + cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } #else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - std::vector dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - - if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::merge(dev_gsrc, gdst); - }; + for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + { + Has_roi(j); + gmat1 = mat1_roi; + gmat2 = mat2_roi; + gmat3 = mat3_roi; + gmat4 = mat4_roi; + gdst_whole = dst; + gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + std::vector dev_gsrc; + dev_gsrc.push_back(gmat1); + dev_gsrc.push_back(gmat2); + dev_gsrc.push_back(gmat3); + dev_gsrc.push_back(gmat4); + + if(j == 0) + { + cout << "no roi:"; + } + else + { + cout << "\nwith roi:"; + }; + cv::ocl::merge(dev_gsrc, gdst); + }; #endif } PARAM_TEST_CASE(SplitTestBase, MatType, int) { - int type; - int channels; - - //src mat - cv::Mat mat; - - //dstmat - cv::Mat dst1; - cv::Mat dst2; - cv::Mat dst3; - cv::Mat dst4; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dst1x; - int dst1y; - int dst2x; - int dst2y; - int dst3x; - int dst3y; - int dst4x; - int dst4y; - - //src mat with roi - cv::Mat mat_roi; - - //dst mat with roi - cv::Mat dst1_roi; - cv::Mat dst2_roi; - cv::Mat dst3_roi; - cv::Mat dst4_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst1_whole; - cv::ocl::oclMat gdst2_whole; - cv::ocl::oclMat gdst3_whole; - cv::ocl::oclMat gdst4_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst1; - cv::ocl::oclMat gdst2; - cv::ocl::oclMat gdst3; - cv::ocl::oclMat gdst4; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - - cv::RNG& rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols-1; //start - roirows = mat.rows-1; - srcx = 1; - srcx = 1; - dst1x = 1; - dst1y =1; - dst2x = 1; - dst2y =1; - dst3x = 1; - dst3y =1; - dst4x = 1; - dst4y =1; - }else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dst1x = 0; - dst1y = 0; - dst2x = 0; - dst2y =0; - dst3x = 0; - dst3y =0; - dst4x = 0; - dst4y =0; - }; - - mat_roi = mat(Rect(srcx,srcy,roicols,roirows)); - - dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows)); - dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows)); - dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows)); - dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows)); - } + int type; + int channels; + + //src mat + cv::Mat mat; + + //dstmat + cv::Mat dst1; + cv::Mat dst2; + cv::Mat dst3; + cv::Mat dst4; + + // set up roi + int roicols; + int roirows; + int srcx; + int srcy; + int dst1x; + int dst1y; + int dst2x; + int dst2y; + int dst3x; + int dst3y; + int dst4x; + int dst4y; + + //src mat with roi + cv::Mat mat_roi; + + //dst mat with roi + cv::Mat dst1_roi; + cv::Mat dst2_roi; + cv::Mat dst3_roi; + cv::Mat dst4_roi; + //std::vector oclinfo; + //ocl dst mat for testing + cv::ocl::oclMat gdst1_whole; + cv::ocl::oclMat gdst2_whole; + cv::ocl::oclMat gdst3_whole; + cv::ocl::oclMat gdst4_whole; + + //ocl mat with roi + cv::ocl::oclMat gmat; + cv::ocl::oclMat gdst1; + cv::ocl::oclMat gdst2; + cv::ocl::oclMat gdst3; + cv::ocl::oclMat gdst4; + + virtual void SetUp() + { + type = GET_PARAM(0); + channels = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Size size(MWIDTH, MHEIGHT); + + mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); + dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //setBinpath(CLBINPATH); + } + + void Has_roi(int b) + { + //cv::RNG& rng = TS::ptr()->get_rng(); + if(b) + { + //randomize ROI + roicols = mat.cols - 1; //start + roirows = mat.rows - 1; + srcx = 1; + srcx = 1; + dst1x = 1; + dst1y = 1; + dst2x = 1; + dst2y = 1; + dst3x = 1; + dst3y = 1; + dst4x = 1; + dst4y = 1; + } + else + { + roicols = mat.cols; + roirows = mat.rows; + srcx = 0; + srcy = 0; + dst1x = 0; + dst1y = 0; + dst2x = 0; + dst2y = 0; + dst3x = 0; + dst3y = 0; + dst4x = 0; + dst4y = 0; + }; + + mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); + + dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); + dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows)); + dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows)); + dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows)); + } }; -struct Split :SplitTestBase {}; - -TEST_P(Split, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int k=LOOPROISTART;k -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; +#ifdef HAVE_OPENCL + +using namespace cv; +using namespace cv::ocl; +using namespace cvtest; +using namespace testing; using namespace std; #define FILTER_IMAGE "../../../samples/gpu/road.png" - -TEST(SURF, Performance) -{ - cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - + +TEST(SURF, Performance) +{ + cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); + ocl::SURF_OCL d_surf; ocl::oclMat d_keypoints; ocl::oclMat d_descriptors; - - double totalgputick=0; - double totalgputick_kernel=0; - - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - ocl::oclMat d_src(img);//upload - - t2=(double)cvGetTickCount();//kernel - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_kp, cpu_dp; - d_keypoints.download (cpu_kp);//download - d_descriptors.download (cpu_dp);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick=t1+totalgputick; - - totalgputick_kernel=t2+totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - - -} + + double totalgputick = 0; + double totalgputick_kernel = 0; + + double t1 = 0; + double t2 = 0; + for(int j = 0; j < LOOP_TIMES + 1; j ++) + { + t1 = (double)cvGetTickCount();//gpu start1 + + ocl::oclMat d_src(img);//upload + + t2 = (double)cvGetTickCount(); //kernel + d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); + t2 = (double)cvGetTickCount() - t2;//kernel + + cv::Mat cpu_kp, cpu_dp; + d_keypoints.download (cpu_kp);//download + d_descriptors.download (cpu_dp);//download + + t1 = (double)cvGetTickCount() - t1;//gpu end1 + + if(j == 0) + continue; + + totalgputick = t1 + totalgputick; + + totalgputick_kernel = t2 + totalgputick_kernel; + + } + + cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + + +} #endif //Have opencl \ No newline at end of file diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index f505dac..7d28700 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -42,4 +42,3 @@ #include "precomp.hpp" - \ No newline at end of file diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp index 417f72f..b7fbe4f 100644 --- a/modules/ocl/perf/utility.cpp +++ b/modules/ocl/perf/utility.cpp @@ -75,13 +75,13 @@ using namespace cvtest; int randomInt(int minVal, int maxVal) { - RNG& rng = TS::ptr()->get_rng(); + RNG &rng = TS::ptr()->get_rng(); return rng.uniform(minVal, maxVal); } double randomDouble(double minVal, double maxVal) { - RNG& rng = TS::ptr()->get_rng(); + RNG &rng = TS::ptr()->get_rng(); return rng.uniform(minVal, maxVal); } @@ -170,7 +170,7 @@ const vector& devices() vector devices(FeatureSet feature) { const vector& d = devices(); - + vector devs_filtered; if (TargetArchs::builtWith(feature)) @@ -207,19 +207,19 @@ vector types(int depth_start, int depth_end, int cn_start, int cn_end) return v; } -const vector& all_types() +const vector &all_types() { static vector v = types(CV_8U, CV_64F, 1, 4); return v; } -Mat readImage(const string& fileName, int flags) +Mat readImage(const string &fileName, int flags) { return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags); } -Mat readImageType(const string& fname, int type) +Mat readImageType(const string &fname, int type) { Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); if (CV_MAT_CN(type) == 4) @@ -232,17 +232,17 @@ Mat readImageType(const string& fname, int type) return src; } -double checkNorm(const Mat& m) +double checkNorm(const Mat &m) { return norm(m, NORM_INF); } -double checkNorm(const Mat& m1, const Mat& m2) +double checkNorm(const Mat &m1, const Mat &m2) { return norm(m1, m2, NORM_INF); } -double checkSimilarity(const Mat& m1, const Mat& m2) +double checkSimilarity(const Mat &m1, const Mat &m2) { Mat diff; matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED); @@ -256,7 +256,7 @@ void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os) } */ -void PrintTo(const Inverse& inverse, std::ostream* os) +void PrintTo(const Inverse &inverse, std::ostream *os) { if (inverse) (*os) << "inverse"; diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp index 8c14544..ef9638f 100644 --- a/modules/ocl/perf/utility.hpp +++ b/modules/ocl/perf/utility.hpp @@ -56,7 +56,7 @@ int randomInt(int minVal, int maxVal); double randomDouble(double minVal, double maxVal); //std::string generateVarList(int first,...); -std::string generateVarList(int& p1,int& p2); +std::string generateVarList(int &p1, int &p2); cv::Size randomSize(int minVal, int maxVal); cv::Scalar randomScalar(double minVal, double maxVal); cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); @@ -72,12 +72,12 @@ void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); //std::vector devices(cv::gpu::FeatureSet feature); //! read image from testdata folder. -cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR); -cv::Mat readImageType(const std::string& fname, int type); +cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); +cv::Mat readImageType(const std::string &fname, int type); -double checkNorm(const cv::Mat& m); -double checkNorm(const cv::Mat& m1, const cv::Mat& m2); -double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2); +double checkNorm(const cv::Mat &m); +double checkNorm(const cv::Mat &m1, const cv::Mat &m2); +double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); #define EXPECT_MAT_NORM(mat, eps) \ { \ @@ -105,9 +105,9 @@ double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2); EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \ } -namespace cv -{ - namespace ocl +namespace cv +{ + namespace ocl { // void PrintTo(const DeviceInfo& info, std::ostream* os); } @@ -120,31 +120,34 @@ using perf::MatType; std::vector types(int depth_start, int depth_end, int cn_start, int cn_end); //! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4). -const std::vector& all_types(); +const std::vector &all_types(); class Inverse { - public: - inline Inverse(bool val = false) : val_(val) {} +public: + inline Inverse(bool val = false) : val_(val) {} - inline operator bool() const { return val_; } + inline operator bool() const + { + return val_; + } - private: - bool val_; +private: + bool val_; }; -void PrintTo(const Inverse& useRoi, std::ostream* os); +void PrintTo(const Inverse &useRoi, std::ostream *os); CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX) - enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; +enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) - CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T); +CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T); CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index ef0a571..dadf57c 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -305,9 +305,9 @@ inline int divUp(int total, int grain) template void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -319,7 +319,7 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string CV_Assert(src1.depth() != CV_8S); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1}, @@ -328,13 +328,13 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string {4, 0, 4, 4, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -352,11 +352,11 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - + T scalar; if(_scalar != NULL) { double scalar1 = *((double *)_scalar); - T scalar = (T)scalar1; + scalar = (T)scalar1; args.push_back( make_pair( sizeof(T), (void *)&scalar )); } @@ -368,9 +368,9 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string } void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -384,7 +384,7 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const o CV_Assert(mask.type() == CV_8U); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1}, @@ -393,13 +393,13 @@ void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const o {1, 1, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -445,36 +445,33 @@ typedef void (*MulDivFunc)(const oclMat &src1, const oclMat &src2, oclMat &dst, void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { - static MulDivFunc tab[] = - { - arithmetic_run, 0, arithmetic_run, arithmetic_run, - arithmetic_run, arithmetic_run, arithmetic_run, - }; - - tab[src1.depth()](src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); + if((src1.clCxt -> impl -> double_support != 0) && (src1.depth() == CV_64F)) + arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); + else + arithmetic_run(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar)); } void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar) { - if(src1.clCxt -> impl -> double_support !=0) + if(src1.clCxt -> impl -> double_support != 0) arithmetic_run(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar)); else arithmetic_run(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar)); } - template +template void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } dst.create(src1.size(), src1.type()); CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows && - src1.type() == dst.type()); + src1.type() == dst.type()); //CV_Assert(src1.depth() != CV_8S); @@ -482,12 +479,12 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); WT s[4] = { saturate_cast(src2.val[0]), saturate_cast(src2.val[1]), - saturate_cast(src2.val[2]), saturate_cast(src2.val[3]) - }; + saturate_cast(src2.val[2]), saturate_cast(src2.val[3]) + }; int vector_lengths[4][7] = {{4, 0, 2, 2, 1, 1, 1}, {2, 0, 1, 1, 1, 1, 1}, @@ -495,15 +492,15 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, {1, 0, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -535,9 +532,9 @@ void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar) { - if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -548,7 +545,7 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co CV_Assert(src.depth() != CV_8S); Context *clCxt = src.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1}, @@ -557,15 +554,15 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co {4, 0, 4, 4, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -579,7 +576,7 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - if(src.clCxt -> impl -> double_support !=0) + if(src.clCxt -> impl -> double_support != 0) args.push_back( make_pair( sizeof(cl_double), (void *)&scalar )); else { @@ -638,9 +635,9 @@ void cv::ocl::subtract(const Scalar &src2, const oclMat &src1, oclMat &dst, cons } void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst) { - if(src.clCxt -> impl -> double_support ==0) + if(src.clCxt -> impl -> double_support == 0) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -666,7 +663,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst) void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { dst.create(src1.size(), CV_8UC1); - CV_Assert(src1.channels() == 1); + CV_Assert(src1.oclchannels() == 1); CV_Assert(src1.type() == src2.type()); Context *clCxt = src1.clCxt; int depth = src1.depth(); @@ -675,10 +672,10 @@ void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); @@ -698,7 +695,7 @@ void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -752,7 +749,7 @@ void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int gr int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int offset = src.offset / (vlen * src.elemSize1()); int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels(); + int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); char build_options[512]; CV_Assert(type == 0 || type == 1 || type == 2); sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type); @@ -764,33 +761,33 @@ void arithmetic_sum_buffer_run(const oclMat &src, cl_mem &dst, int vlen , int gr args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst )); size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; - if(src.channels() != 3) + if(src.oclchannels() != 3) openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options); else openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options); } template -Scalar arithmetic_sum(const oclMat &src) +Scalar arithmetic_sum(const oclMat &src, int type = 0) { size_t groupnum = src.clCxt->impl->maxComputeUnits; CV_Assert(groupnum != 0); - int vlen = src.channels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status; + int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status; Context *clCxt = src.clCxt; T *p = new T[dbsize]; - cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize*sizeof(T)); + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T)); Scalar s; s.val[0] = 0.0; s.val[1] = 0.0; s.val[2] = 0.0; s.val[3] = 0.0; - arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum); + arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type); memset(p, 0, dbsize * sizeof(T)); - openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize*sizeof(T)); + openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T)); for(int i = 0; i < dbsize;) { - for(int j = 0; j < src.channels(); j++, i++) + for(int j = 0; j < src.oclchannels(); j++, i++) s.val[j] += p[i]; } delete[] p; @@ -798,12 +795,12 @@ Scalar arithmetic_sum(const oclMat &src) return s; } -typedef Scalar (*sumFunc)(const oclMat &src); +typedef Scalar (*sumFunc)(const oclMat &src, int type); Scalar cv::ocl::sum(const oclMat &src) { - if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } static sumFunc functab[2] = { @@ -813,7 +810,25 @@ Scalar cv::ocl::sum(const oclMat &src) sumFunc func; func = functab[src.clCxt->impl->double_support]; - return func(src); + return func(src, 0); +} + + +Scalar cv::ocl::sqrSum(const oclMat &src) +{ + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) + { + CV_Error(CV_GpuNotSupported, "select device don't support double"); + } + static sumFunc functab[2] = + { + arithmetic_sum, + arithmetic_sum + }; + + sumFunc func; + func = functab[src.clCxt->impl->double_support]; + return func(src, 2); } ////////////////////////////////////////////////////////////////////////////// //////////////////////////////// meanStdDev ////////////////////////////////// @@ -822,7 +837,7 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) { CV_Assert(src.depth() <= CV_32S); cv::Size sz(1, 1); - int channels = src.channels(); + int channels = src.oclchannels(); Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)), m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); oclMat dst1(m1), dst2(m2); @@ -851,7 +866,7 @@ void arithmetic_minMax_run(const oclMat &src, const oclMat &mask, cl_mem &dst, i int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int offset = src.offset / (vlen * src.elemSize1()); int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels(); + int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); char build_options[50]; sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); args.push_back( make_pair( sizeof(cl_int) , (void *)&cols )); @@ -883,7 +898,7 @@ void arithmetic_minMax_mask_run(const oclMat &src, const oclMat &mask, cl_mem &d vector > args; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; char build_options[50]; - if(src.channels() == 1) + if(src.oclchannels() == 1) { int cols = (src.cols - 1) / vlen + 1; int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; @@ -917,7 +932,7 @@ template void arithmetic_minMax(const oclMat &src, double *minVal, int vlen = 8; int dbsize = groupnum * 2 * vlen * sizeof(T) , status; Context *clCxt = src.clCxt; - cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize); + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize); *minVal = std::numeric_limits::max() , *maxVal = -std::numeric_limits::max(); if (mask.empty()) { @@ -929,7 +944,7 @@ template void arithmetic_minMax(const oclMat &src, double *minVal, } T *p = new T[groupnum * vlen * 2]; memset(p, 0, dbsize); - openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize); + openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); for(int i = 0; i < vlen * groupnum; i++) { *minVal = *minVal < p[i] ? *minVal : p[i]; @@ -945,10 +960,10 @@ template void arithmetic_minMax(const oclMat &src, double *minVal, typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask); void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { - CV_Assert(src.channels() == 1); - if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F) + CV_Assert(src.oclchannels() == 1); + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } static minMaxFunc functab[8] = { @@ -979,7 +994,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) bool isRelative = (normType & NORM_RELATIVE) != 0; normType &= 7; CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2)); - int channels = src1.channels(), i = 0, *p; + int channels = src1.oclchannels(), i = 0, *p; double r = 0; oclMat gm1(src1.size(), src1.type()); int min_int = (normType == NORM_INF ? CL_INT_MIN : 0); @@ -1030,9 +1045,9 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType) ////////////////////////////////////////////////////////////////////////////// void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) { - if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -1041,7 +1056,7 @@ void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) CV_Assert(src.type() == dst.type()); Context *clCxt = src.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1}, @@ -1050,15 +1065,15 @@ void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) {4, 4, 4, 4, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); int rows = divUp(dst.rows, 2); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1079,9 +1094,9 @@ void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName) } void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical) { - if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -1089,7 +1104,7 @@ void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, CV_Assert(src.type() == dst.type()); Context *clCxt = src.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{1, 1, 1, 1, 1, 1, 1}, @@ -1098,15 +1113,15 @@ void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, {1, 1, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); cols = isVertical ? cols : divUp(cols, 2); int rows = isVertical ? divUp(dst.rows, 2) : dst.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1130,7 +1145,7 @@ void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip; - openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.channels(), depth); + openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth); } void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) { @@ -1151,21 +1166,21 @@ void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode) void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { Context *clCxt = src1.clCxt; - int channels = src1.channels(); + int channels = src1.oclchannels(); int rows = src1.rows; int cols = src1.cols; //int step = src1.step; - int src_step = src1.step/ src1.elemSize(); - int dst_step = dst.step/ dst.elemSize(); + int src_step = src1.step / src1.elemSize(); + int dst_step = dst.step / dst.elemSize(); int whole_rows = src1.wholerows; int whole_cols = src1.wholecols; - int src_offset = src1.offset/ src1.elemSize(); - int dst_offset = dst.offset/ dst.elemSize(); - int lut_offset = src2.offset/ src2.elemSize(); + int src_offset = src1.offset / src1.elemSize(); + int dst_offset = dst.offset / dst.elemSize(); + int lut_offset = src2.offset / src2.elemSize(); int left_col = 0, right_col = 0; size_t localSize[] = {16, 16, 1}; //cl_kernel kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,kernelName); - size_t globalSize[] = {(cols + localSize[0] - 1) / localSize[0]*localSize[0], (rows + localSize[1] - 1) / localSize[1]*localSize[1], 1}; + size_t globalSize[] = {(cols + localSize[0] - 1) / localSize[0] *localSize[0], (rows + localSize[1] - 1) / localSize[1] *localSize[1], 1}; if(channels == 1 && cols > 6) { left_col = 4 - (dst_offset & 3); @@ -1187,7 +1202,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str CV_Assert(clCxt == dst.clCxt); CV_Assert(src1.cols == dst.cols); CV_Assert(src1.rows == dst.rows); - CV_Assert(src1.channels() == dst.channels()); + CV_Assert(src1.oclchannels() == dst.oclchannels()); // CV_Assert(src1.step == dst.step); vector > args; @@ -1206,7 +1221,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); - openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.channels(), src1.depth()); + openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.oclchannels(), src1.depth()); } if(channels == 1 && (left_col != 0 || right_col != 0)) { @@ -1231,7 +1246,7 @@ void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, str args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset )); args.push_back( make_pair( sizeof(cl_int), (void *)&src_step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step )); - openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.channels(), src1.depth()); + openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.oclchannels(), src1.depth()); } } @@ -1239,7 +1254,7 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst) { int cn = src.channels(); CV_Assert(src.depth() == CV_8U); - CV_Assert((lut.channels() == 1 || lut.channels() == cn) && lut.rows == 1 && lut.cols == 256); + CV_Assert((lut.oclchannels() == 1 || lut.oclchannels() == cn) && lut.rows == 1 && lut.cols == 256); dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn)); //oclMat _lut(lut); string kernelName = "LUT"; @@ -1259,17 +1274,17 @@ void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, c CV_Assert( src.type() == CV_32F || src.type() == CV_64F); Context *clCxt = src.clCxt; - if(clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } - //int channels = dst.channels(); + //int channels = dst.oclchannels(); int depth = dst.depth(); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(dst.cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(dst.cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -1300,14 +1315,14 @@ void cv::ocl::log(const oclMat &src, oclMat &dst) ////////////////////////////////////////////////////////////////////////////// void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); size_t vector_length = 1; @@ -1316,8 +1331,8 @@ void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclM int rows = dst.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1348,9 +1363,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst) void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -1358,7 +1373,7 @@ void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, s CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); size_t vector_length = 1; @@ -1367,8 +1382,8 @@ void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, s int rows = dst.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1412,22 +1427,22 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angle void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart, string kernelName, bool angleInDegrees) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } Context *clCxt = src1.clCxt; - int channels = src1.channels(); + int channels = src1.oclchannels(); int depth = src1.depth(); int cols = src1.cols * channels; int rows = src1.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1467,22 +1482,22 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees, string kernelName) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } Context *clCxt = src2.clCxt; - int channels = src2.channels(); + int channels = src2.oclchannels(); int depth = src2.depth(); int cols = src2.cols * channels; int rows = src2.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; @@ -1558,7 +1573,7 @@ void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem vector > args; size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1}; char build_options[50]; - if(src.channels() == 1) + if(src.oclchannels() == 1) { int cols = (src.cols - 1) / vlen + 1; int invalid_cols = src.step / (vlen * src.elemSize1()) - cols; @@ -1585,15 +1600,15 @@ void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask, cl_mem } template void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, - Point *minLoc, Point *maxLoc, const oclMat &mask) + Point *minLoc, Point *maxLoc, const oclMat &mask) { - CV_Assert(src.channels() == 1); - size_t groupnum = src.clCxt->impl->maxComputeUnits; + CV_Assert(src.oclchannels() == 1); + size_t groupnum = src.clCxt->impl->maxComputeUnits; CV_Assert(groupnum != 0); int minloc = -1 , maxloc = -1; int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) , status; Context *clCxt = src.clCxt; - cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize); + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize); *minVal = std::numeric_limits::max() , *maxVal = -std::numeric_limits::max(); if (mask.empty()) { @@ -1605,16 +1620,16 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, } T *p = new T[groupnum * vlen * 4]; memset(p, 0, dbsize); - openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize); + openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize); for(int i = 0; i < vlen * groupnum; i++) { - *minVal = (*minVal < p[i] || p[i + 2 * vlen *groupnum] == -1) ? *minVal : p[i]; - minloc = (*minVal < p[i] || p[i + 2 * vlen *groupnum] == -1) ? minloc : p[i + 2 * vlen * groupnum]; + *minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i]; + minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : p[i + 2 * vlen * groupnum]; } for(int i = vlen * groupnum; i < 2 * vlen * groupnum; i++) { - *maxVal = (*maxVal > p[i] || p[i + 2 * vlen *groupnum] == -1) ? *maxVal : p[i]; - maxloc = (*maxVal > p[i] || p[i + 2 * vlen *groupnum] == -1) ? maxloc : p[i + 2 * vlen * groupnum]; + *maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i]; + maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : p[i + 2 * vlen * groupnum]; } int pre_rows = src.offset / src.step; @@ -1645,13 +1660,13 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal, } typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal, - Point *minLoc, Point *maxLoc, const oclMat &mask); + Point *minLoc, Point *maxLoc, const oclMat &mask); void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, Point *minLoc, Point *maxLoc, const oclMat &mask) { - if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } static minMaxLocFunc functab[2] = { @@ -1661,7 +1676,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal, minMaxLocFunc func; func = functab[src.clCxt->impl->double_support]; - func(src,minVal,maxVal,minLoc,maxLoc,mask); + func(src, minVal, maxVal, minLoc, maxLoc, mask); } ////////////////////////////////////////////////////////////////////////////// @@ -1677,7 +1692,7 @@ void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int int cols = all_cols - invalid_cols , elemnum = cols * src.rows;; int offset = src.offset / (vlen * src.elemSize1()); int repeat_s = src.offset / src.elemSize1() - offset * vlen; - int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels(); + int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels(); char build_options[50]; sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e); @@ -1696,9 +1711,9 @@ void arithmetic_countNonZero_run(const oclMat &src, cl_mem &dst, int vlen , int int cv::ocl::countNonZero(const oclMat &src) { size_t groupnum = src.clCxt->impl->maxComputeUnits; - if(src.clCxt->impl->double_support == 0 && src.depth()==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } CV_Assert(groupnum != 0); groupnum = groupnum * 2; @@ -1707,11 +1722,11 @@ int cv::ocl::countNonZero(const oclMat &src) Context *clCxt = src.clCxt; string kernelName = "arithm_op_nonzero"; int *p = new int[dbsize], nonzero = 0; - cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize*sizeof(int)); + cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int)); arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName); memset(p, 0, dbsize * sizeof(int)); - openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize*sizeof(int)); + openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int)); for(int i = 0; i < dbsize; i++) { nonzero += p[i]; @@ -1730,7 +1745,7 @@ void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1}, @@ -1739,13 +1754,13 @@ void bitwise_run(const oclMat &src1, oclMat &dst, string kernelName, const char {4, 4, 4, 4, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -1775,7 +1790,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1}, @@ -1784,13 +1799,13 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker {4, 4, 4, 4, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -1833,7 +1848,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclM CV_Assert(mask.type() == CV_8U); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1}, @@ -1842,13 +1857,13 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclM {1, 1, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -1874,7 +1889,7 @@ void bitwise_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclM } -template +template void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar) { dst.create(src1.size(), src1.type()); @@ -1887,7 +1902,7 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, con CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); WT s[4] = { saturate_cast(src2.val[0]), saturate_cast(src2.val[1]), @@ -1900,13 +1915,13 @@ void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, con {1, 1, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -1957,13 +1972,13 @@ void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const o 0 #else - bitwise_scalar_run, - bitwise_scalar_run, - bitwise_scalar_run, - bitwise_scalar_run, - bitwise_scalar_run, - bitwise_scalar_run, - bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, + bitwise_scalar_run, 0 #endif }; @@ -1979,7 +1994,7 @@ void bitwise_scalar(const oclMat &src1, const Scalar &src2, oclMat &dst, const o void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) { - if(src.clCxt -> impl -> double_support ==0 && src.type()==CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -1992,7 +2007,7 @@ void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst) void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { // dst.create(src1.size(),src1.type()); - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2008,7 +2023,7 @@ void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, co void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2023,7 +2038,7 @@ void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, co void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { // dst.create(src1.size(),src1.type()); - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2040,7 +2055,7 @@ void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, c void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2054,7 +2069,7 @@ void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, c void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2073,7 +2088,7 @@ void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, c void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; @@ -2120,16 +2135,16 @@ cv::ocl::oclMat cv::ocl::operator ^ (const oclMat &src1, const oclMat &src2) #define BLOCK_ROWS (256/TILE_DIM) void transpose_run(const oclMat &src, oclMat &dst, string kernelName) { - if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } CV_Assert(src.cols == dst.rows && src.rows == dst.cols); Context *clCxt = src.clCxt; - int channels = src.channels(); + int channels = src.oclchannels(); int depth = src.depth(); int vector_lengths[4][7] = {{1, 0, 0, 0, 1, 1, 0}, @@ -2138,13 +2153,13 @@ void transpose_run(const oclMat &src, oclMat &dst, string kernelName) {1, 1, 0, 0, 0, 0, 0} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1); int cols = divUp(src.cols + offset_cols, vector_length); size_t localThreads[3] = { TILE_DIM, BLOCK_ROWS, 1 }; - size_t globalThreads[3] = { divUp(cols, TILE_DIM) * localThreads[0], - divUp(src.rows, TILE_DIM) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, TILE_DIM) *localThreads[0], + divUp(src.rows, TILE_DIM) *localThreads[1], 1 }; @@ -2163,7 +2178,7 @@ void transpose_run(const oclMat &src, oclMat &dst, string kernelName) void cv::ocl::transpose(const oclMat &src, oclMat &dst) { - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 || + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_8SC3 || src.type() == CV_8SC4 || src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1); stringstream idxstr; @@ -2186,7 +2201,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); @@ -2197,15 +2212,15 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -2224,11 +2239,11 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, } else { - float alpha_f=alpha,beta_f=beta,gama_f=gama; + float alpha_f = alpha, beta_f = beta, gama_f = gama; args.push_back( make_pair( sizeof(cl_float), (void *)&alpha_f )); args.push_back( make_pair( sizeof(cl_float), (void *)&beta_f )); args.push_back( make_pair( sizeof(cl_float), (void *)&gama_f )); - } + } args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); @@ -2243,13 +2258,13 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst) { CV_Assert(src1.type() == src2.type() && src1.size() == src2.size() && - (src1.depth() == CV_32F )); + (src1.depth() == CV_32F )); dst.create(src1.size(), src1.type()); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); @@ -2260,15 +2275,15 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst) }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -2297,7 +2312,7 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst) Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); @@ -2308,15 +2323,15 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst) }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 256, 1, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -2339,7 +2354,7 @@ void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernel CV_Assert(src1.type() == dst.type()); Context *clCxt = src1.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); size_t vector_length = 1; @@ -2348,10 +2363,10 @@ void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernel int rows = dst.rows; size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = dst.cols * dst.elemSize(); vector > args; @@ -2364,19 +2379,19 @@ void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernel args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); - if(src1.clCxt -> impl -> double_support ==0) + if(src1.clCxt -> impl -> double_support == 0) { - float pf = p; - args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); + float pf = p; + args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); } else - args.push_back( make_pair( sizeof(cl_double), (void *)&p )); + args.push_back( make_pair( sizeof(cl_double), (void *)&p )); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } void cv::ocl::pow(const oclMat &x, double p, oclMat &y) { - if(x.clCxt -> impl -> double_support ==0 && x.type()==CV_64F) + if(x.clCxt -> impl -> double_support == 0 && x.type() == CV_64F) { cout << "Selected device do not support double" << endl; return; diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index 73c1e26..40db57e 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -51,48 +51,51 @@ using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) -void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, - oclMat& result){throw_nogpu();} +void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, + oclMat &result) +{ + throw_nogpu(); +} #else -namespace cv +namespace cv { - namespace ocl - { + namespace ocl + { ////////////////////////////////////OpenCL kernel strings////////////////////////// extern const char *blend_linear; - } + } } -void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, - oclMat& result) +void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, + oclMat &result) { - cv::ocl::Context *ctx = img1.clCxt; - assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); - int channels = img1.channels(); - int depth = img1.depth(); - int rows = img1.rows; - int cols = img1.cols; - int istep = img1.step1(); - int wstep = weights1.step1(); - size_t globalSize[] = {cols * channels, rows, 1}; - size_t localSize[] = {16, 16, 1}; + cv::ocl::Context *ctx = img1.clCxt; + assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt); + int channels = img1.oclchannels(); + int depth = img1.depth(); + int rows = img1.rows; + int cols = img1.cols; + int istep = img1.step1(); + int wstep = weights1.step1(); + size_t globalSize[] = {cols * channels, rows, 1}; + size_t localSize[] = {16, 16, 1}; - vector< pair > args; + vector< pair > args; - if(globalSize[0]!=0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); - args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); - std::string kernelName = "BlendLinear"; + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&istep )); + args.push_back( make_pair( sizeof(cl_int), (void *)&wstep )); + std::string kernelName = "BlendLinear"; - openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); - } + openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); + } } #endif \ No newline at end of file diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 1716f85..0103d27 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -52,213 +52,309 @@ using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) -cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::add(const vector&) { throw_nogpu(); } -const vector& cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const { throw_nogpu(); return trainDescCollection; } -void cv::ocl::BruteForceMatcher_OCL_base::clear() { throw_nogpu(); } -bool cv::ocl::BruteForceMatcher_OCL_base::empty() const { throw_nogpu(); return true; } -bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const { throw_nogpu(); return true; } -void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, const oclMat&, vector&, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat&, oclMat&, const vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, const oclMat&, vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, const Mat&, vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, vector&, const vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, int, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat&, const oclMat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat&, const Mat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, const oclMat&, vector< vector >&, int, const oclMat&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat&, const oclMat&, const oclMat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat&, const Mat&, const Mat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, vector< vector >&, int, const vector&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, float, const oclMat&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, const oclMat&, vector< vector >&, float, const oclMat&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat&, oclMat&, oclMat&, oclMat&, oclMat&, float, const vector&) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, const oclMat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector >&, bool) { throw_nogpu(); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, vector< vector >&, float, const vector&, bool) { throw_nogpu(); } +cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::add(const vector &) +{ + throw_nogpu(); +} +const vector &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const +{ + throw_nogpu(); + return trainDescCollection; +} +void cv::ocl::BruteForceMatcher_OCL_base::clear() +{ + throw_nogpu(); +} +bool cv::ocl::BruteForceMatcher_OCL_base::empty() const +{ + throw_nogpu(); + return true; +} +bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const +{ + throw_nogpu(); + return true; +} +void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, const oclMat &, vector &, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &, oclMat &, const vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, const oclMat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, const Mat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, vector &, const vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, int, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &, const oclMat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &, const Mat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, const oclMat &, vector< vector > &, int, const oclMat &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &, const Mat &, const Mat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, vector< vector > &, int, const vector &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, float, const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, const oclMat &, vector< vector > &, float, const oclMat &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &, oclMat &, oclMat &, oclMat &, oclMat &, float, const vector &) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, const Mat &, vector< vector > &, bool) +{ + throw_nogpu(); +} +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, vector< vector > &, float, const vector &, bool) +{ + throw_nogpu(); +} #else /* !defined (HAVE_OPENCL) */ using namespace std; -namespace cv +namespace cv { - namespace ocl - { + namespace ocl + { ////////////////////////////////////OpenCL kernel strings////////////////////////// extern const char *brute_force_match; - } + } } -template -void matchUnrolledCached(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) +template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ > +void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) { - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - int m_size = MAX_DESC_LEN; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_UnrollMatch"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } -} - -template -void matchUnrolledCached(const oclMat query, const oclMat* trains, int n, const oclMat mask, - const oclMat& bestTrainIdx, const oclMat& bestImgIdx, const oclMat& bestDistance, int distType) + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + int m_size = MAX_DESC_LEN; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_UnrollMatch"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } +} + +template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ > +void matchUnrolledCached(const oclMat query, const oclMat *trains, int n, const oclMat mask, + const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType) { } -template -void match(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) +template < int BLOCK_SIZE, typename T/*, typename Mask*/ > +void match(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) { - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - vector< pair > args; + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + vector< pair > args; - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - std::string kernelName = "BruteForceMatch_Match"; + std::string kernelName = "BruteForceMatch_Match"; - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } } -template -void match(const oclMat query, const oclMat* trains, int n, const oclMat mask, - const oclMat &bestTrainIdx, const oclMat& bestImgIdx, const oclMat& bestDistance, int distType) +template < int BLOCK_SIZE, typename T/*, typename Mask*/ > +void match(const oclMat query, const oclMat *trains, int n, const oclMat mask, + const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType) { } //radius_matchUnrolledCached -template -void matchUnrolledCached(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType) -{ - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - int m_size = MAX_DESC_LEN; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_RadiusUnrollMatch"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } +template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ > +void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType) +{ + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + int m_size = MAX_DESC_LEN; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_RadiusUnrollMatch"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } } //radius_match -template -void radius_match(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance,const oclMat& nMatches, int distType) -{ - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_RadiusMatch"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - //float *dis = (float *)clEnqueueMapBuffer(ctx->impl->clCmdQueue, (cl_mem)distance.data, CL_TRUE, CL_MAP_READ, 0, 8, 0, NULL, NULL, NULL); - //printf("%f, %f\n", dis[0], dis[1]); - } +template < int BLOCK_SIZE, typename T/*, typename Mask*/ > +void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType) +{ + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_RadiusMatch"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + //float *dis = (float *)clEnqueueMapBuffer(ctx->impl->clCmdQueue, (cl_mem)distance.data, CL_TRUE, CL_MAP_READ, 0, 8, 0, NULL, NULL, NULL); + //printf("%f, %f\n", dis[0], dis[1]); + } } // with mask -template < typename T/*, typename Mask*/> -void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) { if (query.cols <= 64) { @@ -273,11 +369,11 @@ void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& mas matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream); }*/ else @@ -287,11 +383,11 @@ void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& mas } // without mask -template -void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& trainIdx, const oclMat& distance, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &trainIdx, const oclMat &distance, int distType) { - oclMat mask; - if (query.cols <= 64) + oclMat mask; + if (query.cols <= 64) { matchUnrolledCached<16, 64, T>(query, train, mask, trainIdx, distance, distType); } @@ -304,11 +400,11 @@ void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& tra matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance); }*/ else @@ -317,9 +413,9 @@ void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& tra } } -template -void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const oclMat& mask, - const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &mask, + const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, int distType) { if (query.cols <= 64) { @@ -334,11 +430,11 @@ void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const ocl matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); }*/ else @@ -347,11 +443,11 @@ void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const ocl } } -template -void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const oclMat& trainIdx, - const oclMat& imgIdx, const oclMat& distance, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &trainIdx, + const oclMat &imgIdx, const oclMat &distance, int distType) { - oclMat mask; + oclMat mask; if (query.cols <= 64) { matchUnrolledCached<16, 64, T>(query, trains, n, mask, trainIdx, imgIdx, distance, distType); @@ -365,11 +461,11 @@ void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const ocl matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream); }*/ else @@ -380,9 +476,9 @@ void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const ocl //radius matchDispatcher // with mask -template < typename T/*, typename Mask*/> -void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType) { if (query.cols <= 64) { @@ -411,12 +507,12 @@ void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance } // without mask -template -void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& trainIdx, - const oclMat& distance, const oclMat& nMatches, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &trainIdx, + const oclMat &distance, const oclMat &nMatches, int distType) { - oclMat mask; - if (query.cols <= 64) + oclMat mask; + if (query.cols <= 64) { matchUnrolledCached<16, 64, T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); } @@ -442,9 +538,9 @@ void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance } } -template < typename T/*, typename Mask*/> -void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType) { if (query.cols <= 64) { @@ -473,12 +569,12 @@ void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxD } // without mask -template -void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxDistance, const oclMat& trainIdx, - const oclMat& distance, const oclMat& nMatches, int distType) +template < typename T/*, typename Mask*/ > +void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxDistance, const oclMat &trainIdx, + const oclMat &distance, const oclMat &nMatches, int distType) { - oclMat mask; - if (query.cols <= 64) + oclMat mask; + if (query.cols <= 64) { matchUnrolledCached<16, 64, T>(query, train, n, maxDistance, mask, trainIdx, distance, nMatches, distType); } @@ -505,143 +601,143 @@ void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxD } //knn match Dispatcher -template -void knn_matchUnrolledCached(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) -{ - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - int m_size = MAX_DESC_LEN; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_knnUnrollMatch"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } -} - -template -void knn_match(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) -{ - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_knnMatch"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } -} - -template -void calcDistanceUnrolled(const oclMat& query, const oclMat& train, const oclMat& mask, const oclMat& allDist, int distType) -{ - cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - int m_size = MAX_DESC_LEN; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_calcDistanceUnrolled"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } -} - -template -void calcDistance(const oclMat& query, const oclMat& train, const oclMat& mask, const oclMat& allDist, int distType) +template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ > +void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) +{ + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + int m_size = MAX_DESC_LEN; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_knnUnrollMatch"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } +} + +template < int BLOCK_SIZE, typename T/*, typename Mask*/ > +void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) +{ + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_knnMatch"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } +} + +template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ > +void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType) +{ + cv::ocl::Context *ctx = query.clCxt; + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + int m_size = MAX_DESC_LEN; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&m_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_calcDistanceUnrolled"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } +} + +template < int BLOCK_SIZE, typename T/*, typename Mask*/ > +void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType) { cv::ocl::Context *ctx = query.clCxt; - size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; - size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; - const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); - int block_size = BLOCK_SIZE; - vector< pair > args; - - if(globalSize[0] != 0) - { - args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); - args.push_back( make_pair( smemSize, (void *)NULL)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); - - std::string kernelName = "BruteForceMatch_calcDistance"; - - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); - } + size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1}; + size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1}; + const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int); + int block_size = BLOCK_SIZE; + vector< pair > args; + + if(globalSize[0] != 0) + { + args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); + args.push_back( make_pair( smemSize, (void *)NULL)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_int), (void *)&distType )); + + std::string kernelName = "BruteForceMatch_calcDistance"; + + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + } } /////////////////////////////////////////////////////////////////////////////// // Calc Distance dispatcher -template -void calcDistanceDispatcher(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& allDist, int distType) +template < typename T/*, typename Mask*/ > +void calcDistanceDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &allDist, int distType) { if (query.cols <= 64) { @@ -669,9 +765,9 @@ void calcDistanceDispatcher(const oclMat& query, const oclMat& train, const oclM } } -template -void match2Dispatcher(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, int distType) +template < typename T/*, typename Mask*/ > +void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, int distType) { if (query.cols <= 64) { @@ -686,11 +782,11 @@ void match2Dispatcher(const oclMat& query, const oclMat& train, const oclMat& ma matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 512) - { + { matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); } else if (query.cols <= 1024) - { + { matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_ >(trainIdx), static_cast< DevMem2D_ > (distance), stream); }*/ else @@ -700,40 +796,40 @@ void match2Dispatcher(const oclMat& query, const oclMat& train, const oclMat& ma } template -void findKnnMatch(int k, const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType) +void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) { - cv::ocl::Context *ctx = trainIdx.clCxt; - size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1}; - size_t localSize[] = {BLOCK_SIZE, 1, 1}; - int block_size = BLOCK_SIZE; - std::string kernelName = "BruteForceMatch_findBestMatch"; + cv::ocl::Context *ctx = trainIdx.clCxt; + size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1}; + size_t localSize[] = {BLOCK_SIZE, 1, 1}; + int block_size = BLOCK_SIZE; + std::string kernelName = "BruteForceMatch_findBestMatch"; for (int i = 0; i < k; ++i) - { - vector< pair > args; + { + vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&i)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); - //args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); - //args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); - //args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&i)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_size )); + //args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows )); + //args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols )); + //args.push_back( make_pair( sizeof(cl_int), (void *)&query.step )); - openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); + openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1); } } -void findKnnMatchDispatcher(int k, const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType) +void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) { findKnnMatch<256>(k, trainIdx, distance, allDist, distType); } //with mask -template -void kmatchDispatcher(const oclMat& query, const oclMat& train, int k, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType) +template < typename T/*, typename Mask*/ > +void kmatchDispatcher(const oclMat &query, const oclMat &train, int k, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) { if (k == 2) { @@ -747,11 +843,11 @@ void kmatchDispatcher(const oclMat& query, const oclMat& train, int k, const ocl } //without mask -template -void kmatchDispatcher(const oclMat& query, const oclMat& train, int k, - const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType) +template < typename T/*, typename Mask*/ > +void kmatchDispatcher(const oclMat &query, const oclMat &train, int k, + const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType) { - oclMat mask; + oclMat mask; if (k == 2) { match2Dispatcher(query, train, mask, trainIdx, distance, distType); @@ -765,103 +861,103 @@ void kmatchDispatcher(const oclMat& query, const oclMat& train, int k, -template -void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance) +template +void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance) { - int distType = 0; - if (mask.data) - { - matchDispatcher(query, train, mask, trainIdx, distance, distType); - } - else - { - matchDispatcher< T >(query, train, trainIdx, distance, distType); - } + int distType = 0; + if (mask.data) + { + matchDispatcher(query, train, mask, trainIdx, distance, distType); + } + else + { + matchDispatcher< T >(query, train, trainIdx, distance, distType); + } } -template -void ocl_matchL1_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks, - const oclMat& trainIdx, const oclMat &imgIdx, const oclMat& distance) +template +void ocl_matchL1_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks, + const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance) { - int distType = 0; + int distType = 0; - if (masks.data) - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); - } - else - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); - } + if (masks.data) + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); + } + else + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); + } } -template -void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance) +template +void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance) { - int distType = 1; - if (mask.data) - { - matchDispatcher(query, train, mask, trainIdx, distance, distType); - } - else - { - matchDispatcher(query, train, trainIdx, distance, distType); - } + int distType = 1; + if (mask.data) + { + matchDispatcher(query, train, mask, trainIdx, distance, distType); + } + else + { + matchDispatcher(query, train, trainIdx, distance, distType); + } } -template -void ocl_matchL2_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks, - const oclMat& trainIdx, const oclMat &imgIdx, const oclMat& distance) +template +void ocl_matchL2_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks, + const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance) { - int distType = 1; - if (masks.data) - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); - } - else - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); - } + int distType = 1; + if (masks.data) + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); + } + else + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); + } } -template -void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance) +template +void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance) { - int distType = 2; - if (mask.data) - { - matchDispatcher(query, train, mask, trainIdx, distance, distType); - } - else - { - matchDispatcher< T >(query, train, trainIdx, distance, distType); - } + int distType = 2; + if (mask.data) + { + matchDispatcher(query, train, mask, trainIdx, distance, distType); + } + else + { + matchDispatcher< T >(query, train, trainIdx, distance, distType); + } } -template -void ocl_matchHamming_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks, - const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance) +template +void ocl_matchHamming_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks, + const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance) { - int distType = 2; - if (masks.data) - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); - } - else - { - matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); - } + int distType = 2; + if (masks.data) + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType); + } + else + { + matchDispatcher(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType); + } } // knn caller -template -void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist) +template +void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist) { - int distType = 0; + int distType = 0; if (mask.data) kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); @@ -869,11 +965,11 @@ void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, int k, const oclM kmatchDispatcher(query, train, k, trainIdx, distance, allDist, distType); } -template -void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist) +template +void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist) { - int distType = 1; + int distType = 1; if (mask.data) kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); @@ -881,92 +977,92 @@ void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, int k, const oclM kmatchDispatcher(query, train, k, trainIdx, distance, allDist, distType); } -template -void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist) +template +void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist) { - int distType = 2; + int distType = 2; - if (mask.data) - kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); - else - kmatchDispatcher(query, train, k, trainIdx, distance, allDist, distType); + if (mask.data) + kmatchDispatcher(query, train, k, mask, trainIdx, distance, allDist, distType); + else + kmatchDispatcher(query, train, k, trainIdx, distance, allDist, distType); } //radius caller -template -void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches) +template +void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches) { - int distType = 0; + int distType = 0; - if (mask.data) - matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); - else - matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); + if (mask.data) + matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); + else + matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); } -template -void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches) +template +void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches) { - int distType = 1; + int distType = 1; - if (mask.data) - matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); - else - matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); + if (mask.data) + matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); + else + matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); } -template -void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches) +template +void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask, + const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches) { - int distType = 2; + int distType = 2; - if (mask.data) - matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); - else - matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); + if (mask.data) + matchDispatcher(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType); + else + matchDispatcher(query, train, maxDistance, trainIdx, distance, nMatches, distType); } cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType distType_) : distType(distType_) { } -void cv::ocl::BruteForceMatcher_OCL_base::add(const vector& descCollection) +void cv::ocl::BruteForceMatcher_OCL_base::add(const vector &descCollection) { - trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); + trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); } -const vector& cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const -{ - return trainDescCollection; +const vector &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const +{ + return trainDescCollection; } -void cv::ocl::BruteForceMatcher_OCL_base::clear() +void cv::ocl::BruteForceMatcher_OCL_base::clear() { - trainDescCollection.clear(); + trainDescCollection.clear(); } -bool cv::ocl::BruteForceMatcher_OCL_base::empty() const -{ - return trainDescCollection.empty(); +bool cv::ocl::BruteForceMatcher_OCL_base::empty() const +{ + return trainDescCollection.empty(); } -bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const -{ - return true; +bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const +{ + return true; } -void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat& query, const oclMat& train, - oclMat& trainIdx, oclMat& distance, const oclMat& mask) -{ - if (query.empty() || train.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train, + oclMat &trainIdx, oclMat &distance, const oclMat &mask) +{ + if (query.empty() || train.empty()) return; - typedef void (*caller_t)(const oclMat& query, const oclMat& train, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance); + typedef void (*caller_t)(const oclMat & query, const oclMat & train, const oclMat & mask, + const oclMat & trainIdx, const oclMat & distance); static const caller_t callers[3][6] = { @@ -991,27 +1087,27 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat& query, const CV_Assert(train.cols == query.cols && train.type() == query.type()); const int nQuery = query.rows; - trainIdx.create(1, nQuery, CV_32S); - distance.create(1, nQuery, CV_32F); + trainIdx.create(1, nQuery, CV_32S); + distance.create(1, nQuery, CV_32F); - caller_t func = callers[distType][query.depth()]; - func(query, train, mask, trainIdx, distance); + caller_t func = callers[distType][query.depth()]; + func(query, train, mask, trainIdx, distance); } -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat& trainIdx, const oclMat& distance, vector&matches) -{ - if (trainIdx.empty() || distance.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &distance, vector &matches) +{ + if (trainIdx.empty() || distance.empty()) return; - + Mat trainIdxCPU(trainIdx); Mat distanceCPU(distance); matchConvert(trainIdxCPU, distanceCPU, matches); } -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& distance, vector&matches) -{ - if (trainIdx.empty() || distance.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &distance, vector &matches) +{ + if (trainIdx.empty() || distance.empty()) return; CV_Assert(trainIdx.type() == CV_32SC1); @@ -1022,8 +1118,8 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, cons matches.clear(); matches.reserve(nQuery); - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); + const int *trainIdx_ptr = trainIdx.ptr(); + const float *distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr) { int trainIdx = *trainIdx_ptr; @@ -1039,24 +1135,24 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, cons } } -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, const oclMat& train, vector& matches, const oclMat& mask) +void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, vector &matches, const oclMat &mask) { - oclMat trainIdx, distance; + oclMat trainIdx, distance; matchSingle(query, train, trainIdx, distance, mask); matchDownload(trainIdx, distance, matches); } -void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const vector& masks) -{ +void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const vector &masks) +{ - if (empty()) + if (empty()) return; if (masks.empty()) { Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); - oclMat* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) *trainCollectionCPU_ptr = trainDescCollection[i]; @@ -1071,13 +1167,13 @@ void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollect Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(oclMat))); - oclMat* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); - oclMat* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); + oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr(); for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) { - const oclMat& train = trainDescCollection[i]; - const oclMat& mask = masks[i]; + const oclMat &train = trainDescCollection[i]; + const oclMat &mask = masks[i]; CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows)); @@ -1090,14 +1186,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollect } } -void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx, - oclMat& imgIdx, oclMat& distance, const oclMat& masks) -{ - if (query.empty() || trainCollection.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, + oclMat &imgIdx, oclMat &distance, const oclMat &masks) +{ + if (query.empty() || trainCollection.empty()) return; - typedef void (*caller_t)(const oclMat& query, const oclMat& trains, const oclMat& masks, - const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance); + typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks, + const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance); static const caller_t callers[3][6] = { @@ -1121,10 +1217,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat& query, c CV_Assert(query.channels() == 1 && query.depth() < CV_64F); const int nQuery = query.rows; - - trainIdx.create(1, nQuery, CV_32S); - imgIdx.create(1, nQuery, CV_32S); - distance.create(1, nQuery, CV_32F); + + trainIdx.create(1, nQuery, CV_32S); + imgIdx.create(1, nQuery, CV_32S); + distance.create(1, nQuery, CV_32F); caller_t func = callers[distType][query.depth()]; CV_Assert(func != 0); @@ -1132,9 +1228,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat& query, c func(query, trainCollection, masks, trainIdx, imgIdx, distance); } -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, vector& matches) +void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, vector &matches) { - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) + if (trainIdx.empty() || imgIdx.empty() || distance.empty()) return; Mat trainIdxCPU(trainIdx); @@ -1144,9 +1240,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat& trainIdx, matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches); } -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, vector& matches) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, vector &matches) +{ + if (trainIdx.empty() || imgIdx.empty() || distance.empty()) return; CV_Assert(trainIdx.type() == CV_32SC1); @@ -1158,9 +1254,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, cons matches.clear(); matches.reserve(nQuery); - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); + const int *trainIdx_ptr = trainIdx.ptr(); + const int *imgIdx_ptr = imgIdx.ptr(); + const float *distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) { int trainIdx = *trainIdx_ptr; @@ -1178,9 +1274,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, cons } } -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, vector& matches, const vector& masks) -{ - oclMat trainCollection; +void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, vector &matches, const vector &masks) +{ + oclMat trainCollection; oclMat maskCollection; makeGpuCollection(trainCollection, maskCollection, masks); @@ -1192,14 +1288,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, vector >& matches, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, vector< vector > &matches, bool compactResult) { - if (trainIdx.empty() || distance.empty()) + if (trainIdx.empty() || distance.empty()) return; Mat trainIdxCPU(trainIdx); @@ -1257,9 +1353,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat& trainId knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, vector< vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &trainIdx, const Mat &distance, vector< vector > &matches, bool compactResult) +{ + if (trainIdx.empty() || distance.empty()) return; CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1); @@ -1268,18 +1364,18 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, c CV_Assert(trainIdx.isContinuous() && distance.isContinuous()); const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows; - const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols; + const int k = trainIdx.type() == CV_32SC2 ? 2 : trainIdx.cols; matches.clear(); matches.reserve(nQuery); - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); + const int *trainIdx_ptr = trainIdx.ptr(); + const float *distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { matches.push_back(vector()); - vector& curMatches = matches.back(); + vector &curMatches = matches.back(); curMatches.reserve(k); for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) @@ -1301,22 +1397,22 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, c } } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, const oclMat& train, vector< vector >& matches - , int k, const oclMat& mask, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, const oclMat &train, vector< vector > &matches + , int k, const oclMat &mask, bool compactResult) { - oclMat trainIdx, distance, allDist; + oclMat trainIdx, distance, allDist; knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask); knnMatchDownload(trainIdx, distance, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& query, const oclMat& trainCollection, - oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& maskCollection) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, + oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &maskCollection) { - if (query.empty() || trainCollection.empty()) + if (query.empty() || trainCollection.empty()) return; - typedef void (*caller_t)(const oclMat& query, const oclMat& trains, const oclMat& masks, - const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance); + typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks, + const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance); #if 0 static const caller_t callers[3][6] = { @@ -1341,9 +1437,9 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& quer const int nQuery = query.rows; - trainIdx.create(1, nQuery, CV_32SC2); - imgIdx.create(1, nQuery, CV_32SC2); - distance.create(1, nQuery, CV_32SC2); + trainIdx.create(1, nQuery, CV_32SC2); + imgIdx.create(1, nQuery, CV_32SC2); + distance.create(1, nQuery, CV_32SC2); trainIdx.setTo(Scalar::all(-1)); @@ -1353,10 +1449,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& quer //func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream)); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, - const oclMat& distance, vector< vector >& matches, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, + const oclMat &distance, vector< vector > &matches, bool compactResult) { - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) + if (trainIdx.empty() || imgIdx.empty() || distance.empty()) return; Mat trainIdxCPU(trainIdx); @@ -1366,10 +1462,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat& trainI knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - vector< vector >& matches, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, + vector< vector > &matches, bool compactResult) { - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) + if (trainIdx.empty() || imgIdx.empty() || distance.empty()) return; CV_Assert(trainIdx.type() == CV_32SC2); @@ -1381,14 +1477,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat& trainIdx, matches.clear(); matches.reserve(nQuery); - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); + const int *trainIdx_ptr = trainIdx.ptr(); + const int *imgIdx_ptr = imgIdx.ptr(); + const float *distance_ptr = distance.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { matches.push_back(vector()); - vector& curMatches = matches.back(); + vector &curMatches = matches.back(); curMatches.reserve(2); for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) @@ -1417,17 +1513,20 @@ namespace struct ImgIdxSetter { explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {} - inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;} + inline void operator()(DMatch &m) const + { + m.imgIdx = imgIdx; + } int imgIdx; }; } -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, vector< vector >& matches, int k, - const vector& masks, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector< vector > &matches, int k, + const vector &masks, bool compactResult) { - - if (k == 2) + + if (k == 2) { oclMat trainCollection; oclMat maskCollection; @@ -1457,13 +1556,13 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, vector< for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) { - vector& localMatch = curMatches[queryIdx]; - vector& globalMatch = matches[queryIdx]; + vector &localMatch = curMatches[queryIdx]; + vector &globalMatch = matches[queryIdx]; for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast(imgIdx))); temp.clear(); - merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); + merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); globalMatch.clear(); const size_t count = std::min((size_t)k, temp.size()); @@ -1480,17 +1579,17 @@ void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, vector< } // radiusMatchSingle -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, const oclMat& train, - oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const oclMat& mask) -{ - if (query.empty() || train.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train, + oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask) +{ + if (query.empty() || train.empty()) return; - typedef void (*caller_t)(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask, - const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches); + typedef void (*caller_t)(const oclMat & query, const oclMat & train, float maxDistance, const oclMat & mask, + const oclMat & trainIdx, const oclMat & distance, const oclMat & nMatches); - //#if 0 - static const caller_t callers[3][6] = + //#if 0 + static const caller_t callers[3][6] = { { ocl_matchL1_gpu, 0/*ocl_matchL1_gpu*/, @@ -1508,7 +1607,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, ocl_matchHamming_gpu, 0/*ocl_matchHamming_gpu*/ } }; -//#endif + //#endif const int nQuery = query.rows; const int nTrain = train.rows; @@ -1517,25 +1616,25 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, CV_Assert(train.type() == query.type() && train.cols == query.cols); CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size())); - nMatches.create(1, nQuery, CV_32SC1); + nMatches.create(1, nQuery, CV_32SC1); if (trainIdx.empty()) { - trainIdx.create(nQuery, std::max((nTrain / 100), 10), CV_32SC1); - distance.create(nQuery, std::max((nTrain / 100), 10), CV_32FC1); + trainIdx.create(nQuery, std::max((nTrain / 100), 10), CV_32SC1); + distance.create(nQuery, std::max((nTrain / 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); - caller_t func = callers[distType][query.depth()]; - //CV_Assert(func != 0); - //func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream)); - func(query, train, maxDistance, mask, trainIdx, distance, nMatches); + caller_t func = callers[distType][query.depth()]; + //CV_Assert(func != 0); + //func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream)); + func(query, train, maxDistance, mask, trainIdx, distance, nMatches); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, - vector< vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, + vector< vector > &matches, bool compactResult) +{ + if (trainIdx.empty() || distance.empty() || nMatches.empty()) return; Mat trainIdxCPU(trainIdx); @@ -1545,10 +1644,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trai radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - vector< vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches, + vector< vector > &matches, bool compactResult) +{ + if (trainIdx.empty() || distance.empty() || nMatches.empty()) return; CV_Assert(trainIdx.type() == CV_32SC1); @@ -1560,12 +1659,12 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx matches.clear(); matches.reserve(nQuery); - const int* nMatches_ptr = nMatches.ptr(); + const int *nMatches_ptr = nMatches.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); + const int *trainIdx_ptr = trainIdx.ptr(queryIdx); + const float *distance_ptr = distance.ptr(queryIdx); const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols); @@ -1577,7 +1676,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx } matches.push_back(vector(nMatches)); - vector& curMatches = matches.back(); + vector &curMatches = matches.back(); for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr) { @@ -1594,22 +1693,22 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx } } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, const oclMat& train, vector< vector >& matches, - float maxDistance, const oclMat& mask, bool compactResult) -{ - oclMat trainIdx, distance, nMatches; +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, const oclMat &train, vector< vector > &matches, + float maxDistance, const oclMat &mask, bool compactResult) +{ + oclMat trainIdx, distance, nMatches; radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask); radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, - oclMat& nMatches, float maxDistance, const vector& masks) -{ - if (query.empty() || empty()) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, + oclMat &nMatches, float maxDistance, const vector &masks) +{ + if (query.empty() || empty()) return; - typedef void (*caller_t)(const oclMat& query, const oclMat* trains, int n, float maxDistance, const oclMat* masks, - const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches); + typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks, + const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches); #if 0 static const caller_t callers[3][6] = { @@ -1635,12 +1734,12 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& qu CV_Assert(query.channels() == 1 && query.depth() < CV_64F); CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); - nMatches.create(1, nQuery, CV_32SC1); + nMatches.create(1, nQuery, CV_32SC1); if (trainIdx.empty()) { - trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); - imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); - distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1); + trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); + imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1); + distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1); } nMatches.setTo(Scalar::all(0)); @@ -1651,14 +1750,14 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& qu vector trains_(trainDescCollection.begin(), trainDescCollection.end()); vector masks_(masks.begin(), masks.end()); - /* func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], - trainIdx, imgIdx, distance, nMatches));*/ + /* func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], + trainIdx, imgIdx, distance, nMatches));*/ } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, - const oclMat& nMatches, vector< vector >& matches, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, + const oclMat &nMatches, vector< vector > &matches, bool compactResult) { - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) + if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) return; Mat trainIdxCPU(trainIdx); @@ -1669,10 +1768,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trai radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - vector< vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches, + vector< vector > &matches, bool compactResult) +{ + if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) return; CV_Assert(trainIdx.type() == CV_32SC1); @@ -1685,13 +1784,13 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx matches.clear(); matches.reserve(nQuery); - const int* nMatches_ptr = nMatches.ptr(); + const int *nMatches_ptr = nMatches.ptr(); for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const int* imgIdx_ptr = imgIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); + const int *trainIdx_ptr = trainIdx.ptr(queryIdx); + const int *imgIdx_ptr = imgIdx.ptr(queryIdx); + const float *distance_ptr = distance.ptr(queryIdx); const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols); @@ -1703,7 +1802,7 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx } matches.push_back(vector()); - vector& curMatches = matches.back(); + vector &curMatches = matches.back(); curMatches.reserve(nMatches); for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) @@ -1721,10 +1820,10 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx } } -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, vector< vector >& matches, float maxDistance, - const vector& masks, bool compactResult) +void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vector< vector > &matches, float maxDistance, + const vector &masks, bool compactResult) { - oclMat trainIdx, imgIdx, distance, nMatches; + oclMat trainIdx, imgIdx, distance, nMatches; radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks); radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult); } diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp new file mode 100644 index 0000000..a032f67 --- /dev/null +++ b/modules/ocl/src/build_warps.cpp @@ -0,0 +1,280 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +#if !defined (HAVE_OPENCL) +void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) +{ + throw_nogpu(); +} +void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) +{ + throw_nogpu(); +} +void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) +{ + throw_nogpu(); +} +#else + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *build_warps; + } +} + +////////////////////////////////////////////////////////////////////////////// +// buildWarpPlaneMaps + +void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, + float scale, oclMat &map_x, oclMat &map_y) +{ + CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); + CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); + CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous()); + + Mat K_Rinv = K * R.t(); + CV_Assert(K_Rinv.isContinuous()); + + Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3 + KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1); + KRT_mat(Range::all(), Range(9, 11)) = T; + + oclMat KRT_oclMat(KRT_mat); + // transfer K_Rinv and T into a single cl_mem + map_x.create(dst_roi.size(), CV_32F); + map_y.create(dst_roi.size(), CV_32F); + + int tl_u = dst_roi.tl().x; + int tl_v = dst_roi.tl().y; + + Context *clCxt = Context::getContext(); + string kernelName = "buildWarpPlaneMaps"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step)); + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + + size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); +} + +////////////////////////////////////////////////////////////////////////////// +// buildWarpCylyndricalMaps + +void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, + oclMat &map_x, oclMat &map_y) +{ + CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); + CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); + + Mat K_Rinv = K * R.t(); + CV_Assert(K_Rinv.isContinuous()); + + oclMat KR_oclMat(K_Rinv.reshape(1, 1)); + + map_x.create(dst_roi.size(), CV_32F); + map_y.create(dst_roi.size(), CV_32F); + + int tl_u = dst_roi.tl().x; + int tl_v = dst_roi.tl().y; + + Context *clCxt = Context::getContext(); + string kernelName = "buildWarpCylindricalMaps"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step)); + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + + size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); +} + +////////////////////////////////////////////////////////////////////////////// +// buildWarpSphericalMaps +void cv::ocl::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, + oclMat &map_x, oclMat &map_y) +{ + CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F); + CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F); + + Mat K_Rinv = K * R.t(); + CV_Assert(K_Rinv.isContinuous()); + + oclMat KR_oclMat(K_Rinv.reshape(1, 1)); + // transfer K_Rinv, R_Kinv into a single cl_mem + map_x.create(dst_roi.size(), CV_32F); + map_y.create(dst_roi.size(), CV_32F); + + int tl_u = dst_roi.tl().x; + int tl_v = dst_roi.tl().y; + + Context *clCxt = Context::getContext(); + string kernelName = "buildWarpSphericalMaps"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step)); + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + + size_t globalThreads[3] = {map_x.cols, map_x.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); +} + + +void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap) +{ + + CV_Assert(M.rows == 2 && M.cols == 3); + + xmap.create(dsize, CV_32FC1); + ymap.create(dsize, CV_32FC1); + + float coeffs[2 * 3]; + Mat coeffsMat(2, 3, CV_32F, (void *)coeffs); + + if (inverse) + M.convertTo(coeffsMat, coeffsMat.type()); + else + { + cv::Mat iM; + invertAffineTransform(M, iM); + iM.convertTo(coeffsMat, coeffsMat.type()); + } + + oclMat coeffsOclMat(coeffsMat.reshape(1, 1)); + + Context *clCxt = Context::getContext(); + string kernelName = "buildWarpAffineMaps"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step)); + + size_t globalThreads[3] = {xmap.cols, xmap.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap) +{ + + CV_Assert(M.rows == 3 && M.cols == 3); + + xmap.create(dsize, CV_32FC1); + ymap.create(dsize, CV_32FC1); + + float coeffs[3 * 3]; + Mat coeffsMat(3, 3, CV_32F, (void *)coeffs); + + if (inverse) + M.convertTo(coeffsMat, coeffsMat.type()); + else + { + cv::Mat iM; + invert(M, iM); + iM.convertTo(coeffsMat, coeffsMat.type()); + } + + oclMat coeffsOclMat(coeffsMat.reshape(1, 1)); + + Context *clCxt = Context::getContext(); + string kernelName = "buildWarpPerspectiveMaps"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step)); + + size_t globalThreads[3] = {xmap.cols, xmap.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); +} + + +#endif // HAVE_OPENCL diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 59bbf29..2501089 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -52,10 +52,22 @@ using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) -void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); } -void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); } -void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } -void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); } +void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) +{ + throw_nogpu(); +} +void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) +{ + throw_nogpu(); +} +void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false) +{ + throw_nogpu(); +} +void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false) +{ + throw_nogpu(); +} #else namespace cv @@ -67,14 +79,14 @@ namespace cv } } -cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL) +cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL) { CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size()); create(dx_.size(), -1); } -void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size) +void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size) { ensureSizeIsEnough(image_size, CV_32SC1, dx); ensureSizeIsEnough(image_size, CV_32SC1, dy); @@ -123,27 +135,31 @@ void cv::ocl::CannyBuf::release() openCLFree(counter); } -namespace cv { namespace ocl { - namespace canny +namespace cv +{ + namespace ocl { - void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols); + namespace canny + { + void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols); - void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); - void calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad); + void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad); + void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad); - void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh); + void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh); - void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols); + void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols); - void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols); + void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols); - void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols); + void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols); + } } -}}// cv::ocl +}// cv::ocl namespace { - void CannyCaller(CannyBuf& buf, oclMat& dst, float low_thresh, float high_thresh) + void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh) { using namespace ::cv::ocl::canny; calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh); @@ -156,13 +172,13 @@ namespace } } -void cv::ocl::Canny(const oclMat& src, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) +void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) { CannyBuf buf(src.size(), apperture_size); Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient); } -void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) +void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient) { using namespace ::cv::ocl::canny; @@ -192,13 +208,13 @@ void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_th } CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } -void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) +void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient) { CannyBuf buf(dx, dy); Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient); } -void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient) +void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient) { using namespace ::cv::ocl::canny; @@ -210,7 +226,8 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d dst.create(dx.size(), CV_8U); dst.setTo(Scalar::all(0)); - buf.dx = dx; buf.dy = dy; + buf.dx = dx; + buf.dy = dy; buf.create(dx.size(), -1); buf.edgeBuf.setTo(Scalar::all(0)); calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); @@ -218,7 +235,7 @@ void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& d CannyCaller(buf, dst, static_cast(low_thresh), static_cast(high_thresh)); } -void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols) +void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols) { Context *clCxt = src.clCxt; string kernelName = "calcSobelRowPass"; @@ -241,7 +258,7 @@ void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_b openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } -void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) +void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) { Context *clCxt = dx_buf.clCxt; string kernelName = "calcMagnitude_buf"; @@ -275,7 +292,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat } openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } -void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad) +void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad) { Context *clCxt = dx.clCxt; string kernelName = "calcMagnitude"; @@ -304,7 +321,7 @@ void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, i openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options); } -void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh) +void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh) { Context *clCxt = dx.clCxt; @@ -335,7 +352,7 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } -void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols) +void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols) { Context *clCxt = map.clCxt; string kernelName = "edgesHysteresisLocal"; @@ -355,7 +372,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, i openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } -void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols) +void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols) { unsigned int count; openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL)); @@ -389,7 +406,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, voi #undef DIVUP } -void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols) +void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols) { Context *clCxt = map.clCxt; string kernelName = "getEdges"; diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index bee370f..67bfeb3 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -81,9 +81,9 @@ namespace void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx) { vector > args; - int channels = src.channels(); + int channels = src.oclchannels(); char build_options[50]; - //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.channels(),bidx); + //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx); sprintf(build_options, "-D DEPTH_%d", src.depth()); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols)); args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows)); @@ -99,7 +99,7 @@ namespace void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) { Size sz = src.size(); - int scn = src.channels(), depth = src.depth(), bidx; + int scn = src.oclchannels(), depth = src.depth(), bidx; CV_Assert(depth == CV_8U || depth == CV_16U); diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp index c33c9a9..8022190 100644 --- a/modules/ocl/src/columnsum.cpp +++ b/modules/ocl/src/columnsum.cpp @@ -53,41 +53,44 @@ using namespace std; #if !defined(HAVE_OPENCL) -void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); } +void cv::ocl::columnSum(const oclMat &src, oclMat &dst) +{ + throw_nogpu(); +} #else /*!HAVE_OPENCL */ -namespace cv -{ - namespace ocl - { - extern const char* imgproc_columnsum; - } +namespace cv +{ + namespace ocl + { + extern const char *imgproc_columnsum; + } } -void cv::ocl::columnSum(const oclMat& src,oclMat& dst) +void cv::ocl::columnSum(const oclMat &src, oclMat &dst) { - CV_Assert(src.type() == CV_32FC1); + CV_Assert(src.type() == CV_32FC1); + + dst.create(src.size(), src.type()); + + Context *clCxt = src.clCxt; - dst.create(src.size(), src.type()); + const std::string kernelName = "columnSum"; - Context *clCxt = src.clCxt; - - const std::string kernelName = "columnSum"; - - std::vector< pair > args; + std::vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); - size_t globalThreads[3] = {dst.cols, 1, 1}; - size_t localThreads[3] = {16, 16, 1}; + size_t globalThreads[3] = {dst.cols, 1, 1}; + size_t localThreads[3] = {16, 16, 1}; - openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); } -#endif \ No newline at end of file +#endif \ No newline at end of file diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index b3eda35..f62541d 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -52,43 +52,50 @@ using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) -void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); } +void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) +{ + throw_nogpu(); +} #else #include -namespace cv{ namespace ocl { - enum FftType - { - C2R = 1, // complex to complex - R2C = 2, // real to opencl HERMITIAN_INTERLEAVED - C2C = 3 // opencl HERMITIAN_INTERLEAVED to real - }; - struct FftPlan +namespace cv +{ + namespace ocl { - friend void fft_setup(); - friend void fft_teardown(); - ~FftPlan(); - protected: - FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); - const Size dft_size; - const int src_step, dst_step; - const int flags; - const FftType type; - clAmdFftPlanHandle plHandle; - static vector planStore; - static bool started; - static clAmdFftSetupData * setupData; - public: - // return a baked plan-> - // if there is one matched plan, return it - // if not, bake a new one, put it into the planStore and return it. - static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); - }; -}} + enum FftType + { + C2R = 1, // complex to complex + R2C = 2, // real to opencl HERMITIAN_INTERLEAVED + C2C = 3 // opencl HERMITIAN_INTERLEAVED to real + }; + struct FftPlan + { + friend void fft_setup(); + friend void fft_teardown(); + ~FftPlan(); + protected: + FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + const Size dft_size; + const int src_step, dst_step; + const int flags; + const FftType type; + clAmdFftPlanHandle plHandle; + static vector planStore; + static bool started; + static clAmdFftSetupData *setupData; + public: + // return a baked plan-> + // if there is one matched plan, return it + // if not, bake a new one, put it into the planStore and return it. + static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type); + }; + } +} bool cv::ocl::FftPlan::started = false; -vector cv::ocl::FftPlan::planStore = vector(); -clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0; +vector cv::ocl::FftPlan::planStore = vector(); +clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0; void cv::ocl::fft_setup() { @@ -134,9 +141,9 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clAmdFftResultLocation place; clAmdFftLayout inLayout; clAmdFftLayout outLayout; - clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D; + clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D; - size_t batchSize = is_row_dft?dft_size.height : 1; + size_t batchSize = is_row_dft ? dft_size.height : 1; size_t clLengthsIn[ 3 ] = {1, 1, 1}; size_t clStridesIn[ 3 ] = {1, 1, 1}; size_t clLengthsOut[ 3 ] = {1, 1, 1}; @@ -195,7 +202,7 @@ cv::ocl::FftPlan::~FftPlan() { if(planStore[i]->plHandle == plHandle) { - planStore.erase(planStore.begin()+ i); + planStore.erase(planStore.begin() + i); } } openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) ); @@ -206,15 +213,15 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int // go through search for(int i = 0; i < planStore.size(); i ++) { - FftPlan * plan = planStore[i]; + FftPlan *plan = planStore[i]; if( - plan->dft_size.width == _dft_size.width && + plan->dft_size.width == _dft_size.width && plan->dft_size.height == _dft_size.height && plan->flags == _flags && plan->src_step == _src_step && plan->dst_step == _dst_step && plan->type == _type - ) + ) { return plan->plHandle; } @@ -225,9 +232,9 @@ clAmdFftPlanHandle cv::ocl::FftPlan::getPlan(Size _dft_size, int _src_step, int return newPlan->plHandle; } -void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) +void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) { - if(dft_size == Size(0,0)) + if(dft_size == Size(0, 0)) { dft_size = src.size(); } @@ -258,7 +265,7 @@ void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) break; case R2C: CV_Assert(!is_row_dft); // this is not supported yet - dst.create(src.rows, src.cols/2 + 1, CV_32FC2); + dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); break; case C2R: CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); @@ -274,23 +281,23 @@ void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags) clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type); //get the buffersize - size_t buffersize=0; + size_t buffersize = 0; openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); - //allocate the intermediate buffer - cl_mem clMedBuffer=NULL; + //allocate the intermediate buffer + cl_mem clMedBuffer = NULL; if (buffersize) { cl_int medstatus; clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } - openCLSafeCall( clAmdFftEnqueueTransform( plHandle, - is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD, - 1, - &src.clCxt->impl->clCmdQueue, - 0, NULL, NULL, - (cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) ); + openCLSafeCall( clAmdFftEnqueueTransform( plHandle, + is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, + 1, + &src.clCxt->impl->clCmdQueue, + 0, NULL, NULL, + (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) ); if(clMedBuffer) { diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 19351bf..1a236e4 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -110,9 +110,9 @@ Ptr cv::ocl::createLinearFilter_GPU(int, int, const Mat &, con } Ptr cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) -{ - throw_nogpu(); - return Ptr(0); +{ + throw_nogpu(); + return Ptr(0); } void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int) @@ -244,7 +244,7 @@ namespace class Filter2DEngine_GPU : public FilterEngine_GPU { public: - Filter2DEngine_GPU(const Ptr& filter2D_) : filter2D(filter2D_) {} + Filter2DEngine_GPU(const Ptr &filter2D_) : filter2D(filter2D_) {} virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) { @@ -328,53 +328,53 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c CV_Assert(src.clCxt == dst.clCxt); CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); - CV_Assert( (src.channels() == dst.channels()) ); + CV_Assert( (src.oclchannels() == dst.oclchannels()) ); - int srcStep = src.step1() / src.channels(); - int dstStep = dst.step1() / dst.channels(); + int srcStep = src.step1() / src.oclchannels(); + int dstStep = dst.step1() / dst.oclchannels(); int srcOffset = src.offset / src.elemSize(); int dstOffset = dst.offset / dst.elemSize(); - int srcOffset_x=srcOffset%srcStep; - int srcOffset_y=srcOffset/srcStep; + int srcOffset_x = srcOffset % srcStep; + int srcOffset_y = srcOffset / srcStep; Context *clCxt = src.clCxt; - string kernelName; + string kernelName; size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1}; - - if(src.type()==CV_8UC1) - { - kernelName = "morph_C1_D0"; - globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; - CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) ); - } - else - { - kernelName = "morph"; - CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) ); - } - char s[64]; - switch(src.type()) - { - case CV_8UC1: - sprintf(s, "-D VAL=255"); - break; - case CV_8UC3: - case CV_8UC4: - sprintf(s, "-D VAL=255 -D GENTYPE=uchar4"); - break; - case CV_32FC1: - sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float"); - break; - case CV_32FC3: - case CV_32FC4: - sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4"); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported type"); - } + size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1}; + + if(src.type() == CV_8UC1) + { + kernelName = "morph_C1_D0"; + globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; + CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + } + else + { + kernelName = "morph"; + CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + } + char s[64]; + switch(src.type()) + { + case CV_8UC1: + sprintf(s, "-D VAL=255"); + break; + case CV_8UC3: + case CV_8UC4: + sprintf(s, "-D VAL=255 -D GENTYPE=uchar4"); + break; + case CV_32FC1: + sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float"); + break; + case CV_32FC3: + case CV_32FC4: + sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4"); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported type"); + } char compile_option[128]; - sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s); + sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); @@ -385,9 +385,9 @@ void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, c args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); - args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols)); - args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows)); - args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } @@ -400,53 +400,53 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, CV_Assert(src.clCxt == dst.clCxt); CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); - CV_Assert( (src.channels() == dst.channels()) ); + CV_Assert( (src.oclchannels() == dst.oclchannels()) ); - int srcStep = src.step1() / src.channels(); - int dstStep = dst.step1() / dst.channels(); + int srcStep = src.step1() / src.oclchannels(); + int dstStep = dst.step1() / dst.oclchannels(); int srcOffset = src.offset / src.elemSize(); int dstOffset = dst.offset / dst.elemSize(); - int srcOffset_x=srcOffset%srcStep; - int srcOffset_y=srcOffset/srcStep; + int srcOffset_x = srcOffset % srcStep; + int srcOffset_y = srcOffset / srcStep; Context *clCxt = src.clCxt; - string kernelName; + string kernelName; size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1}; - - if(src.type()==CV_8UC1) - { - kernelName = "morph_C1_D0"; - globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; - CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) ); - } - else - { - kernelName = "morph"; - CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) ); - } - char s[64]; - switch(src.type()) - { - case CV_8UC1: - sprintf(s, "-D VAL=0"); - break; - case CV_8UC3: - case CV_8UC4: - sprintf(s, "-D VAL=0 -D GENTYPE=uchar4"); - break; - case CV_32FC1: - sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float"); - break; - case CV_32FC3: - case CV_32FC4: - sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4"); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported type"); - } + size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1}; + + if(src.type() == CV_8UC1) + { + kernelName = "morph_C1_D0"; + globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0]; + CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + } + else + { + kernelName = "morph"; + CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) ); + } + char s[64]; + switch(src.type()) + { + case CV_8UC1: + sprintf(s, "-D VAL=0"); + break; + case CV_8UC3: + case CV_8UC4: + sprintf(s, "-D VAL=0 -D GENTYPE=uchar4"); + break; + case CV_32FC1: + sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float"); + break; + case CV_32FC3: + case CV_32FC4: + sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4"); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported type"); + } char compile_option[128]; - sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s); + sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s); vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); @@ -457,9 +457,9 @@ void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data)); - args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols)); - args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows)); - args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } @@ -467,12 +467,12 @@ Ptr cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat { static const GPUMorfFilter_t GPUMorfFilter_callers[2][5] = { - {0, GPUErode, 0, 0, GPUErode }, - {0, GPUDilate, 0, 0, GPUDilate} + {0, GPUErode, 0, GPUErode, GPUErode }, + {0, GPUDilate, 0, GPUDilate, GPUDilate} }; CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE); - CV_Assert(type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4); + CV_Assert(type == CV_8UC1 || type == CV_8UC3 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC1 || type == CV_32FC4); oclMat gpu_krnl; normalizeKernel(kernel, gpu_krnl); @@ -486,7 +486,7 @@ namespace class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU { public: - MorphologyFilterEngine_GPU(const Ptr& filter2D_, int iters_) : + MorphologyFilterEngine_GPU(const Ptr &filter2D_, int iters_) : Filter2DEngine_GPU(filter2D_), iters(iters_) {} virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) @@ -539,18 +539,18 @@ Ptr cv::ocl::createMorphologyFilter_GPU(int op, int type, cons namespace { - void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations,int borderType,const Scalar& borderValue) + void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue) { - if((borderType != cv::BORDER_CONSTANT) || (borderValue!=morphologyDefaultBorderValue())) - { - CV_Error(CV_StsBadArg,"unsupported border type"); - } + if((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue())) + { + CV_Error(CV_StsBadArg, "unsupported border type"); + } Mat kernel; Size ksize = _kernel.data ? _kernel.size() : Size(3, 3); normalizeAnchor(anchor, ksize); - if (iterations == 0 || _kernel.rows *_kernel.cols == 1) + if (iterations == 0 || _kernel.rows * _kernel.cols == 1) { src.copyTo(dst); return; @@ -581,7 +581,7 @@ namespace } void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, - int borderType,const Scalar& borderValue) + int borderType, const Scalar &borderValue) { bool allZero = true; for(int i = 0; i < kernel.rows * kernel.cols; ++i) @@ -591,48 +591,48 @@ void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point an { kernel.data[0] = 1; } - morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations,borderType, borderValue); + morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue); } void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations, - int borderType,const Scalar& borderValue) + int borderType, const Scalar &borderValue) { - morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations,borderType, borderValue); + morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue); } void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations, - int borderType,const Scalar& borderValue) + int borderType, const Scalar &borderValue) { oclMat temp; switch( op ) { case MORPH_ERODE: - erode( src, dst, kernel, anchor, iterations,borderType, borderValue); + erode( src, dst, kernel, anchor, iterations, borderType, borderValue); break; case MORPH_DILATE: - dilate( src, dst, kernel, anchor, iterations,borderType, borderValue); + dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); break; case MORPH_OPEN: - erode( src, temp, kernel, anchor, iterations,borderType, borderValue); - dilate( temp, dst, kernel, anchor, iterations,borderType, borderValue); + erode( src, temp, kernel, anchor, iterations, borderType, borderValue); + dilate( temp, dst, kernel, anchor, iterations, borderType, borderValue); break; case CV_MOP_CLOSE: - dilate( src, temp, kernel, anchor, iterations,borderType, borderValue); - erode( temp, dst, kernel, anchor, iterations,borderType, borderValue); + dilate( src, temp, kernel, anchor, iterations, borderType, borderValue); + erode( temp, dst, kernel, anchor, iterations, borderType, borderValue); break; case CV_MOP_GRADIENT: - erode( src, temp, kernel, anchor, iterations,borderType, borderValue); - dilate( src, dst, kernel, anchor, iterations,borderType, borderValue); + erode( src, temp, kernel, anchor, iterations, borderType, borderValue); + dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); subtract(dst, temp, dst); break; case CV_MOP_TOPHAT: - erode( src, dst, kernel, anchor, iterations,borderType, borderValue); - dilate( dst, temp, kernel, anchor, iterations,borderType, borderValue); + erode( src, dst, kernel, anchor, iterations, borderType, borderValue); + dilate( dst, temp, kernel, anchor, iterations, borderType, borderValue); subtract(src, temp, dst); break; case CV_MOP_BLACKHAT: - dilate( src, dst, kernel, anchor, iterations,borderType, borderValue); - erode( dst, temp, kernel, anchor, iterations,borderType, borderValue); + dilate( src, dst, kernel, anchor, iterations, borderType, borderValue); + erode( dst, temp, kernel, anchor, iterations, borderType, borderValue); subtract(temp, src, dst); break; default: @@ -670,12 +670,12 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, CV_Assert(src.clCxt == dst.clCxt); CV_Assert( (src.cols == dst.cols) && (src.rows == dst.rows) ); - CV_Assert( (src.channels() == dst.channels()) ); + CV_Assert( (src.oclchannels() == dst.oclchannels()) ); CV_Assert( (borderType != 0) ); CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1)); CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1)); Context *clCxt = src.clCxt; - int cn = src.channels(); + int cn = src.oclchannels(); int depth = src.depth(); string kernelName = "filter2D"; @@ -692,14 +692,14 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, {4, 4, 4, 4, 1, 1, 4} }; - int vector_length = vector_lengths[cn-1][depth]; + int vector_length = vector_lengths[cn - 1][depth]; int offset_cols = (dst_offset_x) & (vector_length - 1); int cols = dst.cols + offset_cols; int rows = divUp(dst.rows, vector_length); size_t localThreads[3] = {256, 1, 1}; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], 1 + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], 1 }; vector< pair > args; @@ -723,9 +723,9 @@ void GPUFilter2D(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Ptr cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Point anchor, int borderType) { - static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, 0, GPUFilter2D}; + static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D}; - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4) && dstType == srcType); + CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType); oclMat gpu_krnl; int nDivisor; @@ -767,8 +767,8 @@ namespace class SeparableFilterEngine_GPU : public FilterEngine_GPU { public: - SeparableFilterEngine_GPU(const Ptr& rowFilter_, - const Ptr& columnFilter_) : + SeparableFilterEngine_GPU(const Ptr &rowFilter_, + const Ptr &columnFilter_) : rowFilter(rowFilter_), columnFilter(columnFilter_) { ksize = Size(rowFilter->ksize, columnFilter->ksize); @@ -780,7 +780,7 @@ namespace Size src_size = src.size(); int src_type = src.type(); - int cn = src.channels(); + int cn = src.oclchannels(); //dst.create(src_size, src_type); dst = Scalar(0.0); //dstBuf.create(src_size, src_type); @@ -810,8 +810,8 @@ namespace }; } -Ptr cv::ocl::createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter) +Ptr cv::ocl::createSeparableFilter_GPU(const Ptr &rowFilter, + const Ptr &columnFilter) { return Ptr(new SeparableFilterEngine_GPU(rowFilter, columnFilter)); } @@ -1071,12 +1071,12 @@ void GPUFilterBox_32F_C4R(const oclMat &src, oclMat &dst, Ptr cv::ocl::getBoxFilter_GPU(int srcType, int dstType, const Size &ksize, Point anchor, int borderType) { - static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, 0, GPUFilterBox_8u_C4R}, - {0, GPUFilterBox_32F_C1R, 0, 0, GPUFilterBox_32F_C4R} + static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, GPUFilterBox_8u_C4R, GPUFilterBox_8u_C4R}, + {0, GPUFilterBox_32F_C1R, 0, GPUFilterBox_32F_C4R, GPUFilterBox_32F_C4R} }; //Remove this check if more data types need to be supported. - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4) - && dstType == srcType); + CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || + srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType); normalizeAnchor(anchor, ksize); @@ -1155,7 +1155,7 @@ template void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype) { Context *clCxt = src.clCxt; - int channels = src.channels(); + int channels = src.oclchannels(); size_t localThreads[3] = {16, 16, 1}; string kernelName = "row_filter"; @@ -1208,7 +1208,7 @@ void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel //sanity checks CV_Assert(clCxt == dst.clCxt); CV_Assert(src.cols == dst.cols); - CV_Assert(src.channels() == dst.channels()); + CV_Assert(src.oclchannels() == dst.oclchannels()); CV_Assert(ksize == (anchor << 1) + 1); int src_pix_per_row, dst_pix_per_row; int src_offset_x, src_offset_y, dst_offset_in_pixel; @@ -1283,7 +1283,7 @@ template void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype) { Context *clCxt = src.clCxt; - int channels = src.channels(); + int channels = src.oclchannels(); size_t localThreads[3] = {16, 16, 1}; string kernelName = "col_filter"; @@ -1308,7 +1308,7 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker break; } char compile_option[256]; - + size_t globalThreads[3]; globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; @@ -1319,52 +1319,52 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker { case 1: globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float","uchar","convert_uchar_sat"); + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat"); break; case 2: globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float2","uchar2","convert_uchar2_sat"); + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat"); break; case 3: case 4: globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float4","uchar4","convert_uchar4_sat"); + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat"); break; } } else { globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0]; - switch(dst.type()) - { - case CV_32SC1: - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float","int","convert_int_sat"); - break; - case CV_32SC3: - case CV_32SC4: - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float4","int4","convert_int4_sat"); - break; - case CV_32FC1: - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float","float",""); - break; - case CV_32FC3: - case CV_32FC4: - sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", - anchor, localThreads[0], localThreads[1], channels, btype,"float4","float4",""); - break; - } + switch(dst.type()) + { + case CV_32SC1: + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float", "int", "convert_int_sat"); + break; + case CV_32SC3: + case CV_32SC4: + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat"); + break; + case CV_32FC1: + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float", "float", ""); + break; + case CV_32FC3: + case CV_32FC4: + sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s", + anchor, localThreads[0], localThreads[1], channels, btype, "float4", "float4", ""); + break; + } } //sanity checks CV_Assert(clCxt == dst.clCxt); CV_Assert(src.cols == dst.cols); - CV_Assert(src.channels() == dst.channels()); + CV_Assert(src.oclchannels() == dst.oclchannels()); CV_Assert(ksize == (anchor << 1) + 1); int src_pix_per_row, dst_pix_per_row; int src_offset_x, src_offset_y, dst_offset_in_pixel; @@ -1379,8 +1379,8 @@ void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_ker args.push_back(make_pair(sizeof(cl_mem), &dst.data)); args.push_back(make_pair(sizeof(cl_int), (void *)&dst.cols)); args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows)); - args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholecols)); - args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholerows)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols)); + args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows)); args.push_back(make_pair(sizeof(cl_int), (void *)&src_pix_per_row)); //args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_x)); //args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_y)); @@ -1441,18 +1441,18 @@ Ptr cv::ocl::createSeparableLinearFilter_GPU(int srcType, int void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype) { - if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi - { - if((bordertype & cv::BORDER_ISOLATED) != 0) - { - bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype != cv::BORDER_CONSTANT) && - (bordertype != cv::BORDER_REPLICATE)) - { - CV_Error(CV_StsBadArg,"unsupported border type"); - } - } - } + if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + { + if((bordertype & cv::BORDER_ISOLATED) != 0) + { + bordertype &= ~cv::BORDER_ISOLATED; + if((bordertype != cv::BORDER_CONSTANT) && + (bordertype != cv::BORDER_REPLICATE)) + { + CV_Error(CV_StsBadArg, "unsupported border type"); + } + } + } if( ddepth < 0 ) ddepth = src.depth(); //CV_Assert(ddepth == src.depth()); @@ -1464,10 +1464,10 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat Ptr cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType ) { - Mat kx, ky; - getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); - return createSeparableLinearFilter_GPU(srcType, dstType, - kx, ky, Point(-1,-1), 0, borderType ); + Mat kx, ky; + getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); + return createSeparableLinearFilter_GPU(srcType, dstType, + kx, ky, Point(-1, -1), 0, borderType ); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1517,9 +1517,9 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale) { - if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F) + if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } @@ -1576,18 +1576,18 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si src.copyTo(dst); return; } - if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi - { - if((bordertype & cv::BORDER_ISOLATED) != 0) - { - bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype != cv::BORDER_CONSTANT) && - (bordertype != cv::BORDER_REPLICATE)) - { - CV_Error(CV_StsBadArg,"unsupported border type"); - } - } - } + if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + { + if((bordertype & cv::BORDER_ISOLATED) != 0) + { + bordertype &= ~cv::BORDER_ISOLATED; + if((bordertype != cv::BORDER_CONSTANT) && + (bordertype != cv::BORDER_REPLICATE)) + { + CV_Error(CV_StsBadArg, "unsupported border type"); + } + } + } dst.create(src.size(), src.type()); if( bordertype != BORDER_CONSTANT ) { diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index c35e061..bccf556 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -51,111 +51,114 @@ #include "clAmdBlas.h" #if !defined (HAVE_OPENCL) -void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); } +void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags) +{ + throw_nogpu(); +} #else using namespace cv; - void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, - const oclMat& src3, double beta, oclMat& dst, int flags) - { - CV_Assert(src1.cols == src2.rows && - (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols)); - CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported - if(!src3.empty()) - { - src3.copyTo(dst); - } - else - { - dst.create(src1.rows, src2.cols, src1.type()); - dst.setTo(Scalar::all(0)); - } - openCLSafeCall( clAmdBlasSetup() ); - - const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans; - const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans; - const clAmdBlasOrder order = clAmdBlasRowMajor; +void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, + const oclMat &src3, double beta, oclMat &dst, int flags) +{ + CV_Assert(src1.cols == src2.rows && + (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols)); + CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported + if(!src3.empty()) + { + src3.copyTo(dst); + } + else + { + dst.create(src1.rows, src2.cols, src1.type()); + dst.setTo(Scalar::all(0)); + } + openCLSafeCall( clAmdBlasSetup() ); - const int M = src1.rows; - const int N = src2.cols; - const int K = src1.cols; - int lda = src1.step; - int ldb = src2.step; - int ldc = dst.step; - int offa = src1.offset; - int offb = src2.offset; - int offc = dst.offset; + const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; + const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; + const clAmdBlasOrder order = clAmdBlasRowMajor; + const int M = src1.rows; + const int N = src2.cols; + const int K = src1.cols; + int lda = src1.step; + int ldb = src2.step; + int ldc = dst.step; + int offa = src1.offset; + int offb = src2.offset; + int offc = dst.offset; - switch(src1.type()) - { - case CV_32FC1: - lda /= sizeof(float); - ldb /= sizeof(float); - ldc /= sizeof(float); - offa /= sizeof(float); - offb /= sizeof(float); - offc /= sizeof(float); - openCLSafeCall - ( - clAmdBlasSgemmEx(order, transA, transB, M, N, K, - alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) - ); - break; - case CV_64FC1: - lda /= sizeof(double); - ldb /= sizeof(double); - ldc /= sizeof(double); - offa /= sizeof(double); - offb /= sizeof(double); - offc /= sizeof(double); - openCLSafeCall - ( - clAmdBlasDgemmEx(order, transA, transB, M, N, K, - alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) - ); - break; - case CV_32FC2: - { - lda /= sizeof(std::complex); - ldb /= sizeof(std::complex); - ldc /= sizeof(std::complex); - offa /= sizeof(std::complex); - offb /= sizeof(std::complex); - offc /= sizeof(std::complex); - cl_float2 alpha_2 = {{alpha, 0}}; - cl_float2 beta_2 = {{beta, 0}}; - openCLSafeCall - ( - clAmdBlasCgemmEx(order, transA, transB, M, N, K, - alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) - ); - } - break; - case CV_64FC2: - { - lda /= sizeof(std::complex); - ldb /= sizeof(std::complex); - ldc /= sizeof(std::complex); - offa /= sizeof(std::complex); - offb /= sizeof(std::complex); - offc /= sizeof(std::complex); - cl_double2 alpha_2 = {{alpha, 0}}; - cl_double2 beta_2 = {{beta, 0}}; - openCLSafeCall - ( - clAmdBlasZgemmEx(order, transA, transB, M, N, K, - alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, - beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) - ); - } - break; - } - clAmdBlasTeardown(); - } + + switch(src1.type()) + { + case CV_32FC1: + lda /= sizeof(float); + ldb /= sizeof(float); + ldc /= sizeof(float); + offa /= sizeof(float); + offb /= sizeof(float); + offc /= sizeof(float); + openCLSafeCall + ( + clAmdBlasSgemmEx(order, transA, transB, M, N, K, + alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + break; + case CV_64FC1: + lda /= sizeof(double); + ldb /= sizeof(double); + ldc /= sizeof(double); + offa /= sizeof(double); + offb /= sizeof(double); + offc /= sizeof(double); + openCLSafeCall + ( + clAmdBlasDgemmEx(order, transA, transB, M, N, K, + alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + break; + case CV_32FC2: + { + lda /= sizeof(std::complex); + ldb /= sizeof(std::complex); + ldc /= sizeof(std::complex); + offa /= sizeof(std::complex); + offb /= sizeof(std::complex); + offc /= sizeof(std::complex); + cl_float2 alpha_2 = {{alpha, 0}}; + cl_float2 beta_2 = {{beta, 0}}; + openCLSafeCall + ( + clAmdBlasCgemmEx(order, transA, transB, M, N, K, + alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + } + break; + case CV_64FC2: + { + lda /= sizeof(std::complex); + ldb /= sizeof(std::complex); + ldc /= sizeof(std::complex); + offa /= sizeof(std::complex); + offb /= sizeof(std::complex); + offc /= sizeof(std::complex); + cl_double2 alpha_2 = {{alpha, 0}}; + cl_double2 beta_2 = {{beta, 0}}; + openCLSafeCall + ( + clAmdBlasZgemmEx(order, transA, transB, M, N, K, + alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, + beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL) + ); + } + break; + } + clAmdBlasTeardown(); +} #endif #endif diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index c5fe777..8ac8326 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -52,6 +52,7 @@ #include "precomp.hpp" #include +#include #ifdef EMU #include "runCL.h" #endif @@ -299,7 +300,7 @@ const float icv_stage_threshold_bias = 0.0001f; double globaltime = 0; -CvHaarClassifierCascade* +CvHaarClassifierCascade * gpuCreateHaarClassifierCascade( int stage_count ) { CvHaarClassifierCascade *cascade = 0; @@ -331,7 +332,7 @@ gpuReleaseHidHaarClassifierCascade( GpuHidHaarClassifierCascade **_cascade ) } /* create more efficient internal representation of haar classifier cascade */ -GpuHidHaarClassifierCascade* +GpuHidHaarClassifierCascade * gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier) { GpuHidHaarClassifierCascade *out = 0; @@ -888,6 +889,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0; bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0; + //the Intel HD Graphics is unsupported + if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos) + { + cout << " Intel HD GPU device unsupported " << endl; + return NULL; + } + //double t = 0; if( maxSize.height == 0 || maxSize.width == 0 ) { @@ -948,7 +956,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS vector scalev; for(factor = 1.f;; factor *= scaleFactor) { - CvSize winSize = { cvRound(winSize0.width *factor), cvRound(winSize0.height *factor) }; + CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) }; sz.width = cvRound( gimg.cols / factor ) + 1; sz.height = cvRound( gimg.rows / factor ) + 1; CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 }; @@ -985,7 +993,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS size_t blocksize = 8; size_t localThreads[3] = { blocksize, blocksize , 1 }; - size_t globalThreads[3] = { grp_per_CU * ((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0], + size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0], localThreads[1], 1 }; int outputsz = 256 * globalThreads[0] / localThreads[0]; @@ -1067,7 +1075,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS //classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status); //status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL); - nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,nodenum * sizeof(GpuHidHaarTreeNode)); + nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode)); //openCLVerifyCall(status); openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), @@ -1104,10 +1112,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int argcount = 0; //int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]); //int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline; - // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads); + // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads); //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer)); - - vector > args; + + vector > args; args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer )); @@ -1124,40 +1132,40 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p )); args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - /* - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/ + /* + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq)); + openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/ //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n)); //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline)); //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp)); - // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); + // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); - //t = (double)cvGetTickCount() - t; + // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); + //t = (double)cvGetTickCount() - t; //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); //t = (double)cvGetTickCount(); //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL)); openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); for(int i = 0; i < outputsz; i++) - if(candidate[4*i+2] != 0) - allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3])); + if(candidate[4 * i + 2] != 0) + allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3])); // t = (double)cvGetTickCount() - t; //printf( "post time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); //t = (double)cvGetTickCount(); @@ -1168,7 +1176,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLSafeCall(clReleaseMemObject(scaleinfobuffer)); openCLSafeCall(clReleaseMemObject(nodebuffer)); openCLSafeCall(clReleaseMemObject(candidatebuffer)); - // openCLSafeCall(clReleaseKernel(kernel)); + // openCLSafeCall(clReleaseKernel(kernel)); //t = (double)cvGetTickCount() - t; //printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); } @@ -1200,8 +1208,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS cvRound(factor * winsize0.height) < gimg.rows - 10; n_factors++, factor *= scaleFactor ) { - CvSize winSize = { cvRound( winsize0.width *factor ), - cvRound( winsize0.height *factor ) + CvSize winSize = { cvRound( winsize0.width * factor ), + cvRound( winsize0.height * factor ) }; if( winSize.width < minSize.width || winSize.height < minSize.height ) { @@ -1232,13 +1240,13 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) - sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode); nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, - nodenum * sizeof(GpuHidHaarTreeNode)); + nodenum * sizeof(GpuHidHaarTreeNode)); //openCLVerifyCall(status); openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0, nodenum * sizeof(GpuHidHaarTreeNode), node, 0, NULL, NULL)); cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE, - loopcount * nodenum * sizeof(GpuHidHaarTreeNode)); + loopcount * nodenum * sizeof(GpuHidHaarTreeNode)); int startstage = 0; int endstage = gcascade->count; //cl_kernel kernel; @@ -1270,25 +1278,25 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS int startnodenum = nodenum * i; int argcounts = 0; float factor2 = (float)factor; - /* - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i])); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum)); - */ - - vector > args1; + /* + openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer)); + openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer)); + openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2)); + openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i])); + openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum)); + */ + + vector > args1; args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer )); args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer )); args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 )); args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] )); args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum )); - - size_t globalThreads2[3] = {nodenum,1,1}; - size_t localThreads2[3] = {256,1,1}; - - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); + + size_t globalThreads2[3] = {nodenum, 1, 1}; + size_t localThreads2[3] = {256, 1, 1}; + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1); //clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL); //clFinish(gsum.clCxt->impl->clCmdQueue); @@ -1328,7 +1336,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer)); openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/ - vector > args; + vector > args; args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer )); @@ -1345,9 +1353,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer )); args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer )); args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum )); - - - openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); + + + openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); //openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); //openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); @@ -1356,8 +1364,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS for(int i = 0; i < outputsz; i++) { - if(candidate[4*i+2] != 0) - allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3])); + if(candidate[4 * i + 2] != 0) + allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3])); } free(scaleinfo); @@ -1420,7 +1428,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS } -CvHaarClassifierCascade* +CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) { int i; @@ -1444,7 +1452,7 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) assert( count > 0 ); cascade->stage_classifier[i].count = count; cascade->stage_classifier[i].classifier = - (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); + (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); for( j = 0; j < count; j++ ) { @@ -1456,11 +1464,11 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) stage += dl; classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); + classifier->count * ( sizeof( *classifier->haar_feature ) + + sizeof( *classifier->threshold ) + + sizeof( *classifier->left ) + + sizeof( *classifier->right ) ) + + (classifier->count + 1) * sizeof( *classifier->alpha ) ); classifier->threshold = (float *) (classifier->haar_feature + classifier->count); classifier->left = (int *) (classifier->threshold + classifier->count); classifier->right = (int *) (classifier->left + classifier->count); @@ -1478,8 +1486,8 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) CvRect r; int band = 0; sscanf( stage, "%d%d%d%d%d%f%n", - &r.x, &r.y, &r.width, &r.height, &band, - &(classifier->haar_feature[l].rect[k].weight), &dl ); + &r.x, &r.y, &r.width, &r.height, &band, + &(classifier->haar_feature[l].rect[k].weight), &dl ); stage += dl; classifier->haar_feature[l].rect[k].r = r; } @@ -1491,12 +1499,12 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ ) { memset( classifier->haar_feature[l].rect + k, 0, - sizeof(classifier->haar_feature[l].rect[k]) ); + sizeof(classifier->haar_feature[l].rect[k]) ); } sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]), - &(classifier->left[l]), - &(classifier->right[l]), &dl ); + &(classifier->left[l]), + &(classifier->right[l]), &dl ); stage += dl; } for( l = 0; l <= classifier->count; l++ ) @@ -1536,7 +1544,7 @@ gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) #define _MAX_PATH 1024 #endif -CV_IMPL CvHaarClassifierCascade* +CV_IMPL CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size ) { const char **input_cascade = 0; @@ -1649,7 +1657,7 @@ gpuIsHaarClassifier( const void *struct_ptr ) return CV_IS_HAAR_CLASSIFIER( struct_ptr ); } -void* +void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) { CvHaarClassifierCascade *cascade = NULL; @@ -1699,15 +1707,15 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME ); if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag ) - || trees_fn->data.seq->total <= 0 ) + || trees_fn->data.seq->total <= 0 ) { sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i ); CV_Error( CV_StsError, buf ); } cascade->stage_classifier[i].classifier = - (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total - * sizeof( cascade->stage_classifier[i].classifier[0] ) ); + (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total + * sizeof( cascade->stage_classifier[i].classifier[0] ) ); for( j = 0; j < trees_fn->data.seq->total; ++j ) { cascade->stage_classifier[i].classifier[j].haar_feature = NULL; @@ -1727,17 +1735,17 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 ) { sprintf( buf, "Tree node is not a valid sequence." - " (stage %d, tree %d)", i, j ); + " (stage %d, tree %d)", i, j ); CV_Error( CV_StsError, buf ); } classifier->count = tree_fn->data.seq->total; classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); + classifier->count * ( sizeof( *classifier->haar_feature ) + + sizeof( *classifier->threshold ) + + sizeof( *classifier->left ) + + sizeof( *classifier->right ) ) + + (classifier->count + 1) * sizeof( *classifier->alpha ) ); classifier->threshold = (float *) (classifier->haar_feature + classifier->count); classifier->left = (int *) (classifier->threshold + classifier->count); classifier->right = (int *) (classifier->left + classifier->count); @@ -1755,23 +1763,23 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_MAP( node_fn->tag ) ) { sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)", - k, i, j ); + k, i, j ); CV_Error( CV_StsError, buf ); } feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME ); if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) ) { sprintf( buf, "Feature node is not a valid map. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME ); if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag ) - || rects_fn->data.seq->total < 1 - || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) + || rects_fn->data.seq->total < 1 + || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) { sprintf( buf, "Rects node is not a valid sequence. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } cvStartReadSeq( rects_fn->data.seq, &rects_reader ); @@ -1784,7 +1792,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 ) { sprintf( buf, "Rect %d is not a valid sequence. " - "(stage %d, tree %d, node %d)", l, i, j, k ); + "(stage %d, tree %d, node %d)", l, i, j, k ); CV_Error( CV_StsError, buf ); } @@ -1792,7 +1800,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) { sprintf( buf, "x coordinate must be non-negative integer. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); + "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); CV_Error( CV_StsError, buf ); } r.x = fn->data.i; @@ -1800,27 +1808,27 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) { sprintf( buf, "y coordinate must be non-negative integer. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); + "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); CV_Error( CV_StsError, buf ); } r.y = fn->data.i; fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 ); if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 - || r.x + fn->data.i > cascade->orig_window_size.width ) + || r.x + fn->data.i > cascade->orig_window_size.width ) { sprintf( buf, "width must be positive integer and " - "(x + width) must not exceed window width. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); + "(x + width) must not exceed window width. " + "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); CV_Error( CV_StsError, buf ); } r.width = fn->data.i; fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 ); if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 - || r.y + fn->data.i > cascade->orig_window_size.height ) + || r.y + fn->data.i > cascade->orig_window_size.height ) { sprintf( buf, "height must be positive integer and " - "(y + height) must not exceed window height. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); + "(y + height) must not exceed window height. " + "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); CV_Error( CV_StsError, buf ); } r.height = fn->data.i; @@ -1828,7 +1836,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !CV_NODE_IS_REAL( fn->tag ) ) { sprintf( buf, "weight must be real number. " - "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); + "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); CV_Error( CV_StsError, buf ); } @@ -1847,7 +1855,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !fn || !CV_NODE_IS_INT( fn->tag ) ) { sprintf( buf, "tilted must be 0 or 1. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } classifier->haar_feature[k].tilted = ( fn->data.i != 0 ); @@ -1855,7 +1863,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) { sprintf( buf, "threshold must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } classifier->threshold[k] = (float) fn->data.f; @@ -1863,10 +1871,10 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( fn ) { if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k - || fn->data.i >= tree_fn->data.seq->total ) + || fn->data.i >= tree_fn->data.seq->total ) { sprintf( buf, "left node must be valid node number. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } /* left node */ @@ -1878,20 +1886,20 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !fn ) { sprintf( buf, "left node or left value must be specified. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } if( !CV_NODE_IS_REAL( fn->tag ) ) { sprintf( buf, "left value must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } /* left value */ if( last_idx >= classifier->count + 1 ) { sprintf( buf, "Tree structure is broken: too many values. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } classifier->left[k] = -last_idx; @@ -1901,10 +1909,10 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( fn ) { if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k - || fn->data.i >= tree_fn->data.seq->total ) + || fn->data.i >= tree_fn->data.seq->total ) { sprintf( buf, "right node must be valid node number. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } /* right node */ @@ -1916,20 +1924,20 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( !fn ) { sprintf( buf, "right node or right value must be specified. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } if( !CV_NODE_IS_REAL( fn->tag ) ) { sprintf( buf, "right value must be real number. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } /* right value */ if( last_idx >= classifier->count + 1 ) { sprintf( buf, "Tree structure is broken: too many values. " - "(stage %d, tree %d, node %d)", i, j, k ); + "(stage %d, tree %d, node %d)", i, j, k ); CV_Error( CV_StsError, buf ); } classifier->right[k] = -last_idx; @@ -1941,7 +1949,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) if( last_idx != classifier->count + 1 ) { sprintf( buf, "Tree structure is broken: too few values. " - "(stage %d, tree %d)", i, j ); + "(stage %d, tree %d)", i, j ); CV_Error( CV_StsError, buf ); } @@ -1961,7 +1969,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME ); if( !fn || !CV_NODE_IS_INT( fn->tag ) - || fn->data.i < -1 || fn->data.i >= cascade->count ) + || fn->data.i < -1 || fn->data.i >= cascade->count ) { sprintf( buf, "parent must be integer number. (stage %d)", i ); CV_Error( CV_StsError, buf ); @@ -1969,7 +1977,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) parent = fn->data.i; fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME ); if( !fn || !CV_NODE_IS_INT( fn->tag ) - || fn->data.i < -1 || fn->data.i >= cascade->count ) + || fn->data.i < -1 || fn->data.i >= cascade->count ) { sprintf( buf, "next must be integer number. (stage %d)", i ); CV_Error( CV_StsError, buf ); @@ -1993,7 +2001,7 @@ gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr, -CvAttrList attributes ) + CvAttrList attributes ) { int i, j, k, l; char buf[256]; @@ -2066,7 +2074,7 @@ CvAttrList attributes ) else { cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME, - tree->alpha[-tree->left[k]] ); + tree->alpha[-tree->left[k]] ); } if( tree->right[k] > 0 ) @@ -2076,7 +2084,7 @@ CvAttrList attributes ) else { cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME, - tree->alpha[-tree->right[k]] ); + tree->alpha[-tree->right[k]] ); } cvEndWriteStruct( fs ); /* split */ @@ -2098,14 +2106,14 @@ CvAttrList attributes ) cvEndWriteStruct( fs ); /* root */ } -void* +void * gpuCloneHaarClassifier( const void *struct_ptr ) { CvHaarClassifierCascade *cascade = NULL; int i, j, k, n; const CvHaarClassifierCascade *cascade_src = - (const CvHaarClassifierCascade *) struct_ptr; + (const CvHaarClassifierCascade *) struct_ptr; n = cascade_src->count; cascade = gpuCreateHaarClassifierCascade(n); @@ -2120,8 +2128,8 @@ gpuCloneHaarClassifier( const void *struct_ptr ) cascade->stage_classifier[i].count = 0; cascade->stage_classifier[i].classifier = - (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count - * sizeof( cascade->stage_classifier[i].classifier[0] ) ); + (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count + * sizeof( cascade->stage_classifier[i].classifier[0] ) ); cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count; @@ -2131,17 +2139,17 @@ gpuCloneHaarClassifier( const void *struct_ptr ) for( j = 0; j < cascade->stage_classifier[i].count; ++j ) { const CvHaarClassifier *classifier_src = - &cascade_src->stage_classifier[i].classifier[j]; + &cascade_src->stage_classifier[i].classifier[j]; CvHaarClassifier *classifier = - &cascade->stage_classifier[i].classifier[j]; + &cascade->stage_classifier[i].classifier[j]; classifier->count = classifier_src->count; classifier->haar_feature = (CvHaarFeature *) cvAlloc( - classifier->count * ( sizeof( *classifier->haar_feature ) + - sizeof( *classifier->threshold ) + - sizeof( *classifier->left ) + - sizeof( *classifier->right ) ) + - (classifier->count + 1) * sizeof( *classifier->alpha ) ); + classifier->count * ( sizeof( *classifier->haar_feature ) + + sizeof( *classifier->threshold ) + + sizeof( *classifier->left ) + + sizeof( *classifier->right ) ) + + (classifier->count + 1) * sizeof( *classifier->alpha ) ); classifier->threshold = (float *) (classifier->haar_feature + classifier->count); classifier->left = (int *) (classifier->threshold + classifier->count); classifier->right = (int *) (classifier->left + classifier->count); @@ -2155,7 +2163,7 @@ gpuCloneHaarClassifier( const void *struct_ptr ) classifier->alpha[k] = classifier_src->alpha[k]; } classifier->alpha[classifier->count] = - classifier_src->alpha[classifier->count]; + classifier_src->alpha[classifier->count]; } } @@ -2164,9 +2172,9 @@ gpuCloneHaarClassifier( const void *struct_ptr ) #if 0 CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier, -(CvReleaseFunc)gpuReleaseHaarClassifierCascade, -gpuReadHaarClassifier, gpuWriteHaarClassifier, -gpuCloneHaarClassifier ); + (CvReleaseFunc)gpuReleaseHaarClassifierCascade, + gpuReadHaarClassifier, gpuWriteHaarClassifier, + gpuCloneHaarClassifier ); namespace cv @@ -2185,14 +2193,14 @@ namespace cv } void HaarClassifierCascade::detectMultiScale( const Mat &image, - Vector& objects, double scaleFactor, - int minNeighbors, int flags, - Size minSize ) + Vector &objects, double scaleFactor, + int minNeighbors, int flags, + Size minSize ) { MemStorage storage(cvCreateMemStorage(0)); CvMat _image = image; CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor, - minNeighbors, flags, minSize ); + minNeighbors, flags, minSize ); Seq(_objects).copyTo(objects); } @@ -2202,7 +2210,7 @@ namespace cv } void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum, - const Mat &tilted, double scale ) + const Mat &tilted, double scale ) { CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); @@ -2473,8 +2481,8 @@ else CV_INLINE double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier, -double variance_norm_factor, -size_t p_offset ) + double variance_norm_factor, + size_t p_offset ) { /* int idx = 0; @@ -2500,7 +2508,7 @@ size_t p_offset ) CV_IMPL int gpuRunHaarClassifierCascade( const CvHaarClassifierCascade *_cascade, -CvPoint pt, int start_stage ) + CvPoint pt, int start_stage ) { /* int result = -1; @@ -2586,9 +2594,9 @@ namespace cv struct gpuHaarDetectObjects_ScaleImage_Invoker { gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade, - int _stripSize, double _factor, - const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, - Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) + int _stripSize, double _factor, + const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1, + Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec ) { cascade = _cascade; stripSize = _stripSize; @@ -2614,7 +2622,7 @@ namespace cv { if( gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 ) > 0 ) vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor), - winSize.width, winSize.height)); + winSize.width, winSize.height)); } } @@ -2630,9 +2638,9 @@ namespace cv struct gpuHaarDetectObjects_ScaleCascade_Invoker { gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade, - Size _winsize, const Range &_xrange, double _ystep, - size_t _sumstep, const int **_p, const int **_pq, - ConcurrentRectVector &_vec ) + Size _winsize, const Range &_xrange, double _ystep, + size_t _sumstep, const int **_p, const int **_pq, + ConcurrentRectVector &_vec ) { cascade = _cascade; winsize = _winsize; diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 1a813a7..7eca4fe 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -51,19 +51,65 @@ using namespace std; #if !defined (HAVE_OPENCL) -cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_nogpu(); } -size_t cv::ocl::HOGDescriptor::getDescriptorSize() const { throw_nogpu(); return 0; } -size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); return 0; } -double cv::ocl::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; } -bool cv::ocl::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; } -void cv::ocl::HOGDescriptor::setSVMDetector(const vector&) { throw_nogpu(); } -void cv::ocl::HOGDescriptor::detect(const oclMat&, vector&, double, Size, Size) { throw_nogpu(); } -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat&, vector&, double, Size, Size, double, int) { throw_nogpu(); } -void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat&) { throw_nogpu(); } -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat&, Size, oclMat&, int) { throw_nogpu(); } -std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector(); } -std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector(); } -std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector(); } +cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) +{ + throw_nogpu(); +} +size_t cv::ocl::HOGDescriptor::getDescriptorSize() const +{ + throw_nogpu(); + return 0; +} +size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const +{ + throw_nogpu(); + return 0; +} +double cv::ocl::HOGDescriptor::getWinSigma() const +{ + throw_nogpu(); + return 0; +} +bool cv::ocl::HOGDescriptor::checkDetectorSize() const +{ + throw_nogpu(); + return false; +} +void cv::ocl::HOGDescriptor::setSVMDetector(const vector &) +{ + throw_nogpu(); +} +void cv::ocl::HOGDescriptor::detect(const oclMat &, vector &, double, Size, Size) +{ + throw_nogpu(); +} +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &, vector &, double, Size, Size, double, int) +{ + throw_nogpu(); +} +void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &, Size, oclMat &, int) +{ + throw_nogpu(); +} +std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() +{ + throw_nogpu(); + return std::vector(); +} +std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() +{ + throw_nogpu(); + return std::vector(); +} +std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() +{ + throw_nogpu(); + return std::vector(); +} #else @@ -73,70 +119,79 @@ std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nog #define CELLS_PER_BLOCK_Y 2 #define NTHREADS 256 -namespace cv { namespace ocl +namespace cv { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *objdetect_hog; -}} + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *objdetect_hog; + } +} -namespace cv { namespace ocl { namespace device +namespace cv { - namespace hog + namespace ocl { - int cnbins; - int cblock_stride_x; - int cblock_stride_y; - int cnblocks_win_x; - int cnblocks_win_y; - int cblock_hist_size; - int cblock_hist_size_2up; - int cdescr_size; - int cdescr_width; - - void set_up_constants(int nbins, int block_stride_x, int block_stride_y, - int nblocks_win_x, int nblocks_win_y); - - void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, - int height, int width, const cv::ocl::oclMat& grad, - const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists); - - void normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat& block_hists, float threshold); - - void classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef, - float threshold, cv::ocl::oclMat& labels); - - void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists, - cv::ocl::oclMat& descriptors); - void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists, - cv::ocl::oclMat& descriptors); - - void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img, - float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); - void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img, - float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma); - - void resize( const oclMat &src, oclMat &dst, const Size sz); + namespace device + { + namespace hog + { + int cnbins; + int cblock_stride_x; + int cblock_stride_y; + int cnblocks_win_x; + int cnblocks_win_y; + int cblock_hist_size; + int cblock_hist_size_2up; + int cdescr_size; + int cdescr_width; + + void set_up_constants(int nbins, int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y); + + void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, + int height, int width, const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); + + void normalize_hists(int nbins, int block_stride_x, int block_stride_y, + int height, int width, cv::ocl::oclMat &block_hists, float threshold); + + void classify_hists(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, int height, + int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, + float threshold, cv::ocl::oclMat &labels); + + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors); + void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors); + + void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, + float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); + void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, + float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); + + void resize( const oclMat &src, oclMat &dst, const Size sz); + } + } } -}}} +} using namespace ::cv::ocl::device; cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) - : win_size(win_size_), - block_size(block_size_), - block_stride(block_stride_), - cell_size(cell_size_), - nbins(nbins_), - win_sigma(win_sigma_), - threshold_L2hys(threshold_L2hys_), - gamma_correction(gamma_correction_), - nlevels(nlevels_) + : win_size(win_size_), + block_size(block_size_), + block_stride(block_stride_), + cell_size(cell_size_), + nbins(nbins_), + win_sigma(win_sigma_), + threshold_L2hys(threshold_L2hys_), + gamma_correction(gamma_correction_), + nlevels(nlevels_) { CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && (win_size.height - block_size.height) % block_stride.height == 0); @@ -179,7 +234,7 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; } -void cv::ocl::HOGDescriptor::setSVMDetector(const vector& _detector) +void cv::ocl::HOGDescriptor::setSVMDetector(const vector &_detector) { std::vector detector_reordered(_detector.size()); @@ -189,8 +244,8 @@ void cv::ocl::HOGDescriptor::setSVMDetector(const vector& _detector) for (int i = 0; i < blocks_per_img.height; ++i) for (int j = 0; j < blocks_per_img.width; ++j) { - const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size; - float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size; + const float *src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size; + float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size; for (size_t k = 0; k < block_hist_size; ++k) dst[k] = src[k]; } @@ -203,7 +258,7 @@ void cv::ocl::HOGDescriptor::setSVMDetector(const vector& _detector) CV_Assert(checkDetectorSize()); } -void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride) +void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) { if (!image_scale.empty()) return; @@ -222,7 +277,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride) labels.create(1, wins_per_img.area(), CV_8U); } -void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle) +void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); @@ -239,19 +294,19 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oc } -void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img) +void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) { computeGradient(img, grad, qangle); - hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - grad, qangle, (float)getWinSigma(), block_hists); + hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, + grad, qangle, (float)getWinSigma(), block_hists); - hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - block_hists, (float)threshold_L2hys); + hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, + block_hists, (float)threshold_L2hys); } -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format) +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) { CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); @@ -269,11 +324,11 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, { case DESCR_FORMAT_ROW_BY_ROW: hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); break; case DESCR_FORMAT_COL_BY_COL: hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); break; default: CV_Error(CV_StsBadArg, "Unknown descriptor format"); @@ -281,7 +336,7 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, } -void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector& hits, double hit_threshold, Size win_stride, Size padding) +void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, double hit_threshold, Size win_stride, Size padding) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(padding == Size(0, 0)); @@ -303,7 +358,7 @@ void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector& hits, doub detector, (float)free_coef, (float)hit_threshold, labels); labels.download(labels_host); - unsigned char* vec = labels_host.ptr(); + unsigned char *vec = labels_host.ptr(); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); for (int i = 0; i < wins_per_img.area(); i++) { @@ -316,8 +371,8 @@ void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector& hits, doub -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector& found_locations, double hit_threshold, - Size win_stride, Size padding, double scale0, int group_threshold) +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, double hit_threshold, + Size win_stride, Size padding, double scale0, int group_threshold) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(scale0 > 1); @@ -329,8 +384,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector& f for (levels = 0; levels < nlevels; levels++) { level_scale.push_back(scale); - if (cvRound(img.cols/scale) < win_size.width || - cvRound(img.rows/scale) < win_size.height || scale0 <= 1) + if (cvRound(img.cols / scale) < win_size.width || + cvRound(img.rows / scale) < win_size.height || scale0 <= 1) break; scale *= scale0; } @@ -386,7 +441,8 @@ std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() { - static const float detector[] = { + static const float detector[] = + { 0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f, 0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f, 0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f, @@ -717,8 +773,9 @@ std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() 0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f, -0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f, -0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f, - -9.063785f }; - return vector(detector, detector + sizeof(detector)/sizeof(detector[0])); + -9.063785f + }; + return vector(detector, detector + sizeof(detector) / sizeof(detector[0])); } @@ -726,813 +783,815 @@ std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() { - static const float detector[] = { - 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f, - 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f, - 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f, - 0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f, - -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f, - -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f, - -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f, - 0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f, - 0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f, - 0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f, - 0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f, - 0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f, - 5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f, - 0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f, - 0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f, - 0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f, - 0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f, - 0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f, - 0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f, - -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f, - -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f, - -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f, - 0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f, - 0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f, - -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f, - 0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f, - -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f, - 0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f, - 0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f, - -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f, - -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f, - -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f, - 0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f, - -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f, - 0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f, - 0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f, - -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f, - 0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f, - 3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f, - -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f, - -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f, - -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f, - -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f, - -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f, - -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f, - -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f, - 0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f, - 0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f, - 0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f, - 0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f, - 0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f, - 8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f, - -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f, - -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f, - -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f, - 0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f, - -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f, - -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f, - 0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f, - -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f, - -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f, - 0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f, - 0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f, - 2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f, - 0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f, - -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f, - 6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f, - 0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f, - -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f, - -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f, - -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f, - 1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f, - -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f, - -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f, - -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f, - 0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f, - 0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f, - 0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f, - -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f, - -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f, - -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f, - 0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f, - -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f, - 8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f, - -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f, - -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f, - -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f, - 9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f, - 0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f, - -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f, - 2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f, - 2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f, - -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f, - 0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f, - -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f, - 8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f, - -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f, - -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f, - 8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f, - 0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f, - 0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f, - 0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f, - -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f, - 0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f, - 0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f, - 0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f, - -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f, - -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f, - 0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f, - 0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f, - -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f, - 1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f, - 0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f, - -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f, - -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f, - -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f, - 0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f, - -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f, - 0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f, - -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f, - 0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f, - 0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f, - -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f, - -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f, - 0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f, - 9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f, - -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f, - -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f, - -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f, - -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f, - 0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f, - -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f, - 7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f, - -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f, - 7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f, - 0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f, - 0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f, - 0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f, - -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f, - -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f, - 0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f, - -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f, - 0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f, - 0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f, - -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f, - 0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f, - -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f, - 0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f, - -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f, - -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f, - -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f, - -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f, - -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f, - 0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f, - 0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f, - -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f, - 0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f, - 0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f, - 0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f, - -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f, - -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f, - 0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f, - -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f, - 8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f, - -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f, - -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f, - -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f, - -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f, - 0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f, - 0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f, - 0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f, - 0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f, - 0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f, - 0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f, - -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f, - 0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f, - 0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f, - 0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f, - 0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f, - -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f, - 0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f, - 4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f, - -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f, - -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f, - 0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f, - 0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f, - 0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f, - 0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f, - -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f, - 0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f, - -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f, - -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f, - -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f, - 0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f, - -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f, - -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f, - -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f, - -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f, - -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f, - 8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f, - -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f, - -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f, - -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f, - 3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f, - -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f, - -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f, - -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f, - 0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f, - 1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f, - 0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f, - -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f, - -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f, - -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f, - -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f, - 0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f, - -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f, - 0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f, - -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f, - -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f, - -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f, - 0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f, - -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f, - 0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f, - 0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f, - -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f, - -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f, - 0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f, - 0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f, - 0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f, - -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f, - -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f, - 0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f, - -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f, - 2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f, - 0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f, - -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f, - 0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f, - 0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f, - 0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f, - 0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f, - 0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f, - -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f, - -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f, - 0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f, - -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f, - 0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f, - -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f, - 0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f, - -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f, - -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f, - 0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f, - 5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f, - 0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f, - -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f, - 0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f, - 0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f, - 0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f, - 0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f, - 0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f, - -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f, - -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f, - -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f, - -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f, - 0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f, - -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f, - 0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f, - 7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f, - 8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f, - -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f, - -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f, - -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f, - -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f, - -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f, - 0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f, - 0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f, - 0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f, - -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f, - 5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f, - -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f, - -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f, - -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f, - 0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f, - 0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f, - 0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f, - 0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f, - -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f, - 0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f, - 0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f, - 0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f, - 0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f, - -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f, - -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f, - 0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f, - -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f, - -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f, - -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f, - 0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f, - -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f, - -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f, - -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f, - -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f, - 0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f, - 0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f, - -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f, - -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f, - -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f, - -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f, - 0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f, - -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f, - -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f, - -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f, - 0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f, - -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f, - 0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f, - -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f, - 0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f, - 6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f, - -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f, - -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f, - 0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f, - -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f, - -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f, - -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f, - 0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f, - -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f, - 8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f, - -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f, - -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f, - -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f, - 0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f, - 0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f, - 0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f, - -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f, - -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f, - -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f, - -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f, - 0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f, - 0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f, - -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f, - -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f, - 0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f, - 0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f, - -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f, - -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f, - -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f, - 7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f, - -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f, - -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f, - -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f, - -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f, - 0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f, - 0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f, - 6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f, - 0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f, - 0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f, - -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f, - 0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f, - 0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f, - 0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f, - 0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f, - -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f, - 0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f, - -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f, - 2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f, - 0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f, - 0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f, - 0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f, - 0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f, - 0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f, - 0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f, - 0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f, - -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f, - -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f, - -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f, - 0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f, - -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f, - -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f, - -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f, - 8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f, - -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f, - -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f, - 0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f, - -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f, - -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f, - -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f, - -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f, - 0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f, - 0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f, - 0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f, - -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f, - 0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f, - 6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f, - -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f, - 0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f, - -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f, - 0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f, - 0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f, - -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f, - 0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f, - -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f, - 0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f, - -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f, - 0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f, - 3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f, - -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f, - 0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f, - -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f, - 6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f, - -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f, - 1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f, - 0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f, - -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f, - 1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f, - 0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f, - 0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f, - -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f, - -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f, - 0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f, - -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f, - 0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f, - 8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f, - 0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f, - -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f, - 0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f, - -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f, - 0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f, - -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f, - -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f, - 0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f, - -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f, - 0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f, - -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f, - 0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f, - -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f, - -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f, - -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f, - -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f, - -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f, - 8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f, - 0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f, - 0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f, - 0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f, - 0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f, - 0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f, - 0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f, - 0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f, - 0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f, - -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f, - -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f, - 0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f, - 0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f, - 0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f, - 0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f, - 0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f, - -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f, - 0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f, - 2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f, - 0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f, - -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f, - -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f, - 7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f, - -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f, - -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f, - -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f, - -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f, - -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f, - -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f, - 0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f, - -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f, - 0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f, - 0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f, - 0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f, - -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f, - -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f, - -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f, - 0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f, - -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f, - 0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f, - 0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f, - 0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f, - -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f, - 8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f, - 0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f, - 0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f, - 0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f, - -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f, - 0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f, - -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f, - 0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f, - -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f, - 0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f, - -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f, - 0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f, - -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f, - -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f, - -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f, - 0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f, - -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f, - 0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f, - -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f, - 0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f, - -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f, - -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f, - -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f, - 0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f, - 0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f, - 0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f, - -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f, - -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f, - 0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f, - 0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f, - 0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f, - -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f, - 0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f, - -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f, - 4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f, - -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f, - 9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f, - -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f, - 0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f, - -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f, - -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f, - 0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f, - 0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f, - -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f, - -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f, - 0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f, - 5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f, - 0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f, - -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f, - -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f, - -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f, - -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f, - 0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f, - -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f, - 0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f, - -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f, - 0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f, - -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f, - 0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f, - -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f, - -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f, - -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f, - 0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f, - -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f, - -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f, - 0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f, - -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f, - -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f, - -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f, - 0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f, - 0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f, - 0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f, - 0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f, - -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f, - 3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f, - 0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f, - -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f, - 0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f, - 0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f, - -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f, - -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f, - 0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f, - 0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f, - -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f, - -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f, - -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f, - -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f, - -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f, - -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f, - -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f, - 0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f, - 0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f, - -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f, - 0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f, - -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f, - 0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f, - -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f, - -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f, - -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f, - 0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f, - 0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f, - 0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f, - -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f, - -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f, - 0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f, - 0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f, - 0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f, - -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f, - 0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f, - 0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f, - 0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f, - -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f, - 0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f, - 0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f, - 0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f, - -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f, - -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f, - 0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f, - -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f, - 0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f, - -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f, - -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f, - -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f, - -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f, - 0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f, - 0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f, - -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f, - 0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f, - 7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f, - 0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f, - -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f, - 0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f, - 0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f, - -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f, - 0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f, - 0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f, - -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f, - -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f, - 0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f, - 1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f, - 0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f, - 5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f, - 4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f, - 0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f, - 0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f, - -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f, - -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f, - 0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f, - 0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f, - 0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f, - 0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f, - 0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f, - 0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f, - -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f, - 0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f, - 0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f, - 0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f, - -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f, - 4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f, - -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f, - -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f, - 6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f, - -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f, - 0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f, - 0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f, - 0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f, - -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f, - -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f, - -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f, - -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f, - 0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f, - -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f, - 0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f, - 2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f, - -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f, - -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f, - -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f, - 2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f, - 0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f, - 0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f, - 0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f, - -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f, - -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f, - -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f, - -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f, - 0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f, - 0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f, - 0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f, - -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f, - -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f, - -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f, - -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f, - -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f, - 0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f, - -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f, - -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f, - 0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f, - -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f, - 0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f, - -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f, - 0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f, - 1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f, - -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f, - -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f, - -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f, - -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f, - 0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f, - 0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f, - 0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f, - 0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f, - 0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f, - -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f, - -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f, - 0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f, - -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f, - 0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f, - -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f, - 0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f, - 0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f, - 3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f, - 0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f, - 0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f, - -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f, - -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f, - 0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f, - 0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f, - 0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f, - -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f, - 0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f, - 0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f, - 6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f, - -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f, - 0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f, - -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f, - 0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f, - 0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f, - 9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f, - -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f, - 0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f, - -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f, - 0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f, - -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f, - 5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f, - -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f, - -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f, - -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f, - 0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f, - -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f, - 0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f, - -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f, - 0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f, - -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f, - -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f, - 0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f, - 0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f, - 8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f, - -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f, - -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f, - 0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f, - -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f, - 7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f, - 0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f, - 0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f, - -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f, - 0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f, - 0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f, - -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f, - -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f, - 0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f, - 8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f, - -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f, - 0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f, - -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f, - 0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f, - -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f, - 0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f, - 0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f, - -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f, - 0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f, - -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f, - 0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f, - 0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f, - 0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f, - -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f, - -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f, - 0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f, - -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f, - -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f, - -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f, - -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f, - -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f, - -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f, - -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f, - 0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f, - -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f, - -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f, - -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f, - 0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f, - -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f, - -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f, - -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f, - 0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f, - 0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f, - 0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f, - -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f, - -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f, - -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f, - -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f, - -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f, - 2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f, - -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f, - 0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f, - 0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f, - 0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f, - -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f, - 4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f, - 0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f, - 0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f, - -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f, - -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f, - -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f, - -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f, - -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f, - -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f }; - return vector(detector, detector + sizeof(detector)/sizeof(detector[0])); + static const float detector[] = + { + 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f, + 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f, + 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f, + 0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f, + -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f, + -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f, + -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f, + 0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f, + 0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f, + 0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f, + 0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f, + 0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f, + 5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f, + 0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f, + 0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f, + 0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f, + 0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f, + 0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f, + 0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f, + -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f, + -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f, + -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f, + 0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f, + 0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f, + -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f, + 0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f, + -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f, + 0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f, + 0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f, + -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f, + -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f, + -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f, + 0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f, + -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f, + 0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f, + 0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f, + -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f, + 0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f, + 3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f, + -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f, + -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f, + -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f, + -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f, + -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f, + -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f, + -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f, + 0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f, + 0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f, + 0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f, + 0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f, + 0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f, + 8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f, + -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f, + -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f, + -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f, + 0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f, + -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f, + -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f, + 0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f, + -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f, + -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f, + 0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f, + 0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f, + 2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f, + 0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f, + -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f, + 6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f, + 0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f, + -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f, + -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f, + -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f, + 1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f, + -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f, + -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f, + -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f, + 0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f, + 0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f, + 0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f, + -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f, + -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f, + -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f, + 0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f, + -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f, + 8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f, + -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f, + -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f, + -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f, + 9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f, + 0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f, + -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f, + 2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f, + 2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f, + -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f, + 0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f, + -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f, + 8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f, + -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f, + -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f, + 8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f, + 0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f, + 0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f, + 0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f, + -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f, + 0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f, + 0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f, + 0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f, + -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f, + -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f, + 0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f, + 0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f, + -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f, + 1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f, + 0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f, + -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f, + -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f, + -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f, + 0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f, + -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f, + 0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f, + -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f, + 0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f, + 0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f, + -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f, + -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f, + 0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f, + 9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f, + -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f, + -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f, + -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f, + -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f, + 0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f, + -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f, + 7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f, + -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f, + 7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f, + 0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f, + 0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f, + 0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f, + -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f, + -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f, + 0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f, + -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f, + 0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f, + 0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f, + -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f, + 0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f, + -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f, + 0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f, + -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f, + -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f, + -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f, + -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f, + -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f, + 0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f, + 0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f, + -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f, + 0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f, + 0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f, + 0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f, + -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f, + -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f, + 0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f, + -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f, + 8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f, + -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f, + -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f, + -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f, + -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f, + 0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f, + 0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f, + 0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f, + 0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f, + 0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f, + 0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f, + -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f, + 0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f, + 0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f, + 0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f, + 0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f, + -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f, + 0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f, + 4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f, + -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f, + -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f, + 0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f, + 0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f, + 0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f, + 0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f, + -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f, + 0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f, + -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f, + -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f, + -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f, + 0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f, + -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f, + -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f, + -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f, + -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f, + -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f, + 8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f, + -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f, + -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f, + -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f, + 3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f, + -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f, + -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f, + -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f, + 0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f, + 1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f, + 0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f, + -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f, + -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f, + -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f, + -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f, + 0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f, + -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f, + 0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f, + -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f, + -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f, + -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f, + 0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f, + -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f, + 0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f, + 0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f, + -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f, + -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f, + 0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f, + 0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f, + 0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f, + -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f, + -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f, + 0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f, + -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f, + 2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f, + 0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f, + -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f, + 0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f, + 0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f, + 0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f, + 0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f, + 0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f, + -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f, + -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f, + 0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f, + -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f, + 0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f, + -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f, + 0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f, + -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f, + -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f, + 0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f, + 5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f, + 0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f, + -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f, + 0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f, + 0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f, + 0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f, + 0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f, + 0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f, + -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f, + -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f, + -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f, + -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f, + 0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f, + -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f, + 0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f, + 7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f, + 8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f, + -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f, + -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f, + -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f, + -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f, + -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f, + 0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f, + 0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f, + 0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f, + -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f, + 5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f, + -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f, + -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f, + -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f, + 0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f, + 0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f, + 0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f, + 0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f, + -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f, + 0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f, + 0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f, + 0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f, + 0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f, + -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f, + -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f, + 0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f, + -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f, + -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f, + -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f, + 0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f, + -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f, + -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f, + -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f, + -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f, + 0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f, + 0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f, + -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f, + -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f, + -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f, + -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f, + 0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f, + -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f, + -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f, + -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f, + 0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f, + -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f, + 0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f, + -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f, + 0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f, + 6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f, + -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f, + -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f, + 0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f, + -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f, + -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f, + -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f, + 0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f, + -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f, + 8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f, + -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f, + -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f, + -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f, + 0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f, + 0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f, + 0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f, + -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f, + -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f, + -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f, + -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f, + 0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f, + 0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f, + -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f, + -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f, + 0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f, + 0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f, + -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f, + -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f, + -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f, + 7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f, + -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f, + -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f, + -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f, + -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f, + 0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f, + 0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f, + 6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f, + 0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f, + 0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f, + -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f, + 0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f, + 0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f, + 0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f, + 0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f, + -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f, + 0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f, + -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f, + 2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f, + 0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f, + 0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f, + 0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f, + 0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f, + 0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f, + 0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f, + 0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f, + -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f, + -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f, + -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f, + 0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f, + -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f, + -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f, + -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f, + 8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f, + -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f, + -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f, + 0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f, + -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f, + -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f, + -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f, + -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f, + 0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f, + 0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f, + 0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f, + -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f, + 0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f, + 6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f, + -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f, + 0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f, + -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f, + 0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f, + 0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f, + -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f, + 0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f, + -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f, + 0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f, + -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f, + 0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f, + 3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f, + -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f, + 0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f, + -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f, + 6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f, + -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f, + 1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f, + 0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f, + -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f, + 1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f, + 0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f, + 0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f, + -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f, + -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f, + 0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f, + -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f, + 0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f, + 8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f, + 0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f, + -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f, + 0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f, + -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f, + 0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f, + -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f, + -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f, + 0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f, + -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f, + 0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f, + -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f, + 0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f, + -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f, + -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f, + -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f, + -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f, + -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f, + 8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f, + 0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f, + 0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f, + 0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f, + 0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f, + 0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f, + 0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f, + 0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f, + 0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f, + -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f, + -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f, + 0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f, + 0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f, + 0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f, + 0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f, + 0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f, + -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f, + 0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f, + 2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f, + 0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f, + -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f, + -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f, + 7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f, + -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f, + -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f, + -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f, + -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f, + -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f, + -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f, + 0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f, + -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f, + 0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f, + 0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f, + 0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f, + -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f, + -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f, + -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f, + 0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f, + -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f, + 0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f, + 0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f, + 0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f, + -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f, + 8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f, + 0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f, + 0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f, + 0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f, + -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f, + 0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f, + -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f, + 0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f, + -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f, + 0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f, + -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f, + 0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f, + -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f, + -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f, + -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f, + 0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f, + -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f, + 0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f, + -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f, + 0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f, + -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f, + -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f, + -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f, + 0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f, + 0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f, + 0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f, + -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f, + -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f, + 0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f, + 0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f, + 0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f, + -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f, + 0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f, + -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f, + 4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f, + -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f, + 9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f, + -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f, + 0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f, + -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f, + -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f, + 0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f, + 0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f, + -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f, + -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f, + 0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f, + 5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f, + 0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f, + -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f, + -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f, + -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f, + -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f, + 0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f, + -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f, + 0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f, + -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f, + 0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f, + -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f, + 0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f, + -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f, + -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f, + -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f, + 0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f, + -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f, + -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f, + 0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f, + -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f, + -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f, + -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f, + 0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f, + 0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f, + 0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f, + 0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f, + -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f, + 3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f, + 0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f, + -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f, + 0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f, + 0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f, + -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f, + -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f, + 0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f, + 0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f, + -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f, + -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f, + -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f, + -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f, + -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f, + -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f, + -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f, + 0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f, + 0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f, + -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f, + 0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f, + -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f, + 0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f, + -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f, + -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f, + -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f, + 0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f, + 0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f, + 0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f, + -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f, + -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f, + 0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f, + 0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f, + 0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f, + -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f, + 0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f, + 0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f, + 0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f, + -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f, + 0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f, + 0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f, + 0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f, + -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f, + -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f, + 0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f, + -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f, + 0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f, + -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f, + -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f, + -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f, + -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f, + 0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f, + 0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f, + -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f, + 0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f, + 7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f, + 0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f, + -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f, + 0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f, + 0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f, + -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f, + 0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f, + 0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f, + -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f, + -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f, + 0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f, + 1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f, + 0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f, + 5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f, + 4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f, + 0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f, + 0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f, + -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f, + -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f, + 0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f, + 0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f, + 0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f, + 0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f, + 0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f, + 0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f, + -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f, + 0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f, + 0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f, + 0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f, + -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f, + 4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f, + -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f, + -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f, + 6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f, + -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f, + 0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f, + 0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f, + 0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f, + -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f, + -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f, + -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f, + -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f, + 0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f, + -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f, + 0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f, + 2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f, + -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f, + -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f, + -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f, + 2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f, + 0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f, + 0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f, + 0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f, + -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f, + -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f, + -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f, + -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f, + 0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f, + 0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f, + 0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f, + -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f, + -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f, + -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f, + -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f, + -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f, + 0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f, + -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f, + -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f, + 0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f, + -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f, + 0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f, + -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f, + 0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f, + 1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f, + -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f, + -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f, + -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f, + -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f, + 0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f, + 0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f, + 0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f, + 0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f, + 0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f, + -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f, + -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f, + 0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f, + -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f, + 0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f, + -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f, + 0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f, + 0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f, + 3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f, + 0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f, + 0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f, + -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f, + -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f, + 0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f, + 0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f, + 0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f, + -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f, + 0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f, + 0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f, + 6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f, + -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f, + 0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f, + -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f, + 0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f, + 0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f, + 9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f, + -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f, + 0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f, + -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f, + 0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f, + -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f, + 5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f, + -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f, + -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f, + -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f, + 0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f, + -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f, + 0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f, + -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f, + 0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f, + -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f, + -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f, + 0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f, + 0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f, + 8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f, + -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f, + -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f, + 0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f, + -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f, + 7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f, + 0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f, + 0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f, + -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f, + 0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f, + 0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f, + -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f, + -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f, + 0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f, + 8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f, + -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f, + 0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f, + -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f, + 0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f, + -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f, + 0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f, + 0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f, + -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f, + 0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f, + -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f, + 0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f, + 0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f, + 0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f, + -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f, + -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f, + 0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f, + -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f, + -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f, + -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f, + -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f, + -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f, + -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f, + -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f, + 0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f, + -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f, + -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f, + -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f, + 0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f, + -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f, + -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f, + -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f, + 0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f, + 0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f, + 0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f, + -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f, + -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f, + -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f, + -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f, + -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f, + 2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f, + -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f, + 0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f, + 0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f, + 0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f, + -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f, + 4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f, + 0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f, + 0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f, + -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f, + -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f, + -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f, + -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f, + -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f, + -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f + }; + return vector(detector, detector + sizeof(detector) / sizeof(detector[0])); } /* Returns the nearest upper power of two, works only for @@ -1554,7 +1613,7 @@ static int power_2up(unsigned int n) } void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, - int nblocks_win_x, int nblocks_win_y) + int nblocks_win_x, int nblocks_win_y) { cnbins = nbins; cblock_stride_x = block_stride_x; @@ -1576,12 +1635,12 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b } void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, const cv::ocl::oclMat& grad, - const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists) + int height, int width, const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) { Context *clCxt = Context::getContext(); - string kernelName = "compute_hists_kernel"; - vector< pair > args; + string kernelName = "compute_hists_kernel"; + vector< pair > args; int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; @@ -1617,11 +1676,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc } void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat& block_hists, float threshold) + int height, int width, cv::ocl::oclMat &block_hists, float threshold) { Context *clCxt = Context::getContext(); - string kernelName = "normalize_hists_kernel"; - vector< pair > args; + string kernelName = "normalize_hists_kernel"; + vector< pair > args; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; int nthreads = power_2up(block_hist_size); @@ -1645,13 +1704,13 @@ void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int bl } void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef, - float threshold, cv::ocl::oclMat& labels) + int block_stride_x, int win_stride_y, int win_stride_x, int height, + int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, + float threshold, cv::ocl::oclMat &labels) { Context *clCxt = Context::getContext(); - string kernelName = "classify_hists_kernel"; - vector< pair > args; + string kernelName = "classify_hists_kernel"; + vector< pair > args; int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; @@ -1679,12 +1738,12 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo } void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors) + int win_stride_y, int win_stride_x, int height, int width, + const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); - string kernelName = "extract_descrs_by_rows_kernel"; - vector< pair > args; + string kernelName = "extract_descrs_by_rows_kernel"; + vector< pair > args; int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; @@ -1710,12 +1769,12 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, } void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors) + int win_stride_y, int win_stride_x, int height, int width, + const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); - string kernelName = "extract_descrs_by_cols_kernel"; - vector< pair > args; + string kernelName = "extract_descrs_by_cols_kernel"; + vector< pair > args; int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; @@ -1746,12 +1805,12 @@ static inline int divUp(int total, int grain) return (total + grain - 1) / grain; } -void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img, - float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, + float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) { Context *clCxt = Context::getContext(); - string kernelName = "compute_gradients_8UC1_kernel"; - vector< pair > args; + string kernelName = "compute_gradients_8UC1_kernel"; + vector< pair > args; size_t localThreads[3] = { NTHREADS, 1, 1 }; size_t globalThreads[3] = { width, height, 1 }; @@ -1775,16 +1834,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img, - float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, + float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) { Context *clCxt = Context::getContext(); - string kernelName = "compute_gradients_8UC4_kernel"; - vector< pair > args; + string kernelName = "compute_gradients_8UC4_kernel"; + vector< pair > args; size_t localThreads[3] = { NTHREADS, 1, 1 }; size_t globalThreads[3] = { width, height, 1 }; - + char correctGamma = (correct_gamma) ? 1 : 0; int img_step = img.step >> 2; int grad_quadstep = grad.step >> 3; diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index 06721b0..62ea42e 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -77,7 +77,10 @@ void cv::ocl::resize(const oclMat &, oclMat &, Size, double, double, int) { throw_nogpu(); } -void cv::ocl::remap(const oclMat&, oclMat&, oclMat&, oclMat&, int, int ,const Scalar&) { throw_nogpu(); } +void cv::ocl::remap(const oclMat &, oclMat &, oclMat &, oclMat &, int, int , const Scalar &) +{ + throw_nogpu(); +} void cv::ocl::copyMakeBorder(const oclMat &, oclMat &, int, int, int, int, const Scalar &) { @@ -103,7 +106,7 @@ void cv::ocl::bilateralFilter(const oclMat &, oclMat &, int, double, double, int { throw_nogpu(); } -void cv::ocl::convolve(const oclMat&, const oclMat&, oclMat&) +void cv::ocl::convolve(const oclMat &, const oclMat &, oclMat &) { throw_nogpu(); } @@ -130,7 +133,7 @@ namespace cv extern const char *imgproc_bilateral; extern const char *imgproc_calcHarris; extern const char *imgproc_calcMinEigenVal; - extern const char *imgproc_convolve; + extern const char *imgproc_convolve; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template struct index_and_sizeof; @@ -196,7 +199,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar)); args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val)); args.push_back( make_pair(sizeof(cl_int), (void *)&type)); - openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type) @@ -233,7 +236,7 @@ namespace cv args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f)); args.push_back( make_pair(sizeof(cl_float), (void *)&max_val)); args.push_back( make_pair(sizeof(cl_int), (void *)&type)); - openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } @@ -252,17 +255,17 @@ namespace cv return thresh; } - //////////////////////////////////////////////////////////////////////////////////////////// - /////////////////////////////// remap ////////////////////////////////////////////////// - //////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////// remap ////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////// - void remap( const oclMat& src, oclMat& dst, oclMat& map1, oclMat& map2, int interpolation, int borderType, const Scalar& borderValue ) + void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue ) { Context *clCxt = src.clCxt; - CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST - || interpolation == INTER_CUBIC || interpolation== INTER_LANCZOS4); - CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type()== CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); - CV_Assert(!map2.data || map2.size()== map1.size()); + CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST + || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4); + CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1)); + CV_Assert(!map2.data || map2.size() == map1.size()); CV_Assert(dst.size() == map1.size()); dst.create(map1.size(), src.type()); @@ -285,7 +288,7 @@ namespace cv kernelName = "remapNNSConstant"; } - else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1) + else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1) { if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT) kernelName = "remapLNF1Constant"; @@ -293,37 +296,37 @@ namespace cv kernelName = "remapNNF1Constant"; } - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); - int type = src.type(); - size_t blkSizeX = 16, blkSizeY = 16; - size_t glbSizeX; + int type = src.type(); + size_t blkSizeX = 16, blkSizeY = 16; + size_t glbSizeX; int cols = dst.cols; - if(src.type() == CV_8UC1) + if(src.type() == CV_8UC1) { - cols = (dst.cols + dst.offset%4 + 3)/4; - glbSizeX = cols %blkSizeX==0 ? cols : (cols/blkSizeX+1)*blkSizeX; - + cols = (dst.cols + dst.offset % 4 + 3) / 4; + glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; + } - else if(src.type() == CV_8UC4 || src.type() == CV_32FC1) + else if(src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1) { - cols = (dst.cols + (dst.offset>>2)%4 + 3)/4; - glbSizeX = cols %blkSizeX==0 ? cols : (cols/blkSizeX+1)*blkSizeX; + cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4; + glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else { - glbSizeX = dst.cols%blkSizeX==0 ? dst.cols : (dst.cols/blkSizeX+1)*blkSizeX; - + glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; + } - size_t glbSizeY = dst.rows%blkSizeY==0 ? dst.rows : (dst.rows/blkSizeY+1)*blkSizeY; - size_t globalThreads[3] = {glbSizeX,glbSizeY,1}; - size_t localThreads[3] = {blkSizeX,blkSizeY,1}; + size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; + size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; + size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; /* ///////////////////////////// //using the image buffer ///////////////////////////// - + size_t image_row_pitch = 0; cl_int err1, err2, err3; cl_mem_flags flags1 = CL_MEM_READ_ONLY; @@ -366,8 +369,8 @@ namespace cv printf("Error code %d \n", err3); return; } - // clWaitForEvents(1, &BtoI_event); - + // clWaitForEvents(1, &BtoI_event); + cl_int ret; Mat test(src.rows, src.cols, CV_8UC1); memset(test.data, 0, src.rows*src.cols); @@ -391,66 +394,66 @@ namespace cv vector< pair > args; if(map1.channels() == 2) { - args.push_back( make_pair(sizeof(cl_mem),(void*)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem),(void*)&src.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); // args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer - args.push_back( make_pair(sizeof(cl_mem),(void*)&map1.data)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.rows)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.rows)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); if(src.clCxt -> impl -> double_support != 0) { - args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue)); + args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } else { - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - args.push_back( make_pair(sizeof(cl_float4),(void*)&borderFloat)); + float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; + args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); } } if(map1.channels() == 1) { - args.push_back( make_pair(sizeof(cl_mem),(void*)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem),(void*)&src.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); // args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer - args.push_back( make_pair(sizeof(cl_mem),(void*)&map1.data)); - args.push_back( make_pair(sizeof(cl_mem),(void*)&map2.data)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.offset)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.step)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&src.rows)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&dst.rows)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols)); - args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data)); + args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols)); + args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows)); args.push_back( make_pair(sizeof(cl_int), (void *)&cols)); if(src.clCxt -> impl -> double_support != 0) { - args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue)); + args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue)); } else { - float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; - args.push_back( make_pair(sizeof(cl_float4),(void*)&borderFloat)); + float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]}; + args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat)); } } - openCLExecuteKernel(clCxt,&imgproc_remap,kernelName,globalThreads,localThreads,args,src.channels(),src.depth()); - } - + openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); + } + //////////////////////////////////////////////////////////////////////////////////////////// // resize @@ -462,11 +465,11 @@ namespace cv float ify = 1. / fy; double ifx_d = 1. / fx; double ify_d = 1. / fy; - int srcStep_in_pixel = src.step1() / src.channels(); - int srcoffset_in_pixel = src.offset / src.elemSize(); - int dstStep_in_pixel = dst.step1() / dst.channels(); - int dstoffset_in_pixel = dst.offset / dst.elemSize(); - //printf("%d %d\n",src.step1() , dst.elemSize()); + int srcStep_in_pixel = src.step1() / src.oclchannels(); + int srcoffset_in_pixel = src.offset / src.elemSize(); + int dstStep_in_pixel = dst.step1() / dst.oclchannels(); + int dstoffset_in_pixel = dst.offset / dst.elemSize(); + //printf("%d %d\n",src.step1() , dst.elemSize()); string kernelName; if(interpolation == INTER_LINEAR) kernelName = "resizeLN"; @@ -479,13 +482,13 @@ namespace cv if(src.type() == CV_8UC1) { size_t cols = (dst.cols + dst.offset % 4 + 3) / 4; - glbSizeX = cols % blkSizeX == 0 && cols != 0? cols : (cols / blkSizeX + 1) * blkSizeX; + glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX; } else { - glbSizeX = dst.cols % blkSizeX == 0 && dst.cols !=0? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; + glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX; } - size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; + size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY; size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; @@ -504,13 +507,13 @@ namespace cv args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows)); if(src.clCxt -> impl -> double_support != 0) { - args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); - args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); + args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d)); + args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d)); } else { - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); + args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); + args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); } } else @@ -529,15 +532,15 @@ namespace cv args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); } - openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation) { - CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 - || src.type() == CV_32FC1 || src.type() == CV_32FC4); + CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 + || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4); CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST); CV_Assert( src.size().area() > 0 ); CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) ); @@ -546,7 +549,7 @@ namespace cv { if(dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy)) { - CV_Error(CV_StsUnmatchedSizes,"invalid dsize and fx, fy!"); + CV_Error(CV_StsUnmatchedSizes, "invalid dsize and fx, fy!"); } } if( dsize == Size() ) @@ -585,10 +588,10 @@ namespace cv return medianFilter(src1, dst, m); } - int srcStep = src.step1() / src.channels(); - int dstStep = dst.step1() / dst.channels(); - int srcOffset = src.offset / src.channels() / src.elemSize1(); - int dstOffset = dst.offset / dst.channels() / dst.elemSize1(); + int srcStep = src.step1() / src.oclchannels(); + int dstStep = dst.step1() / dst.oclchannels(); + int srcOffset = src.offset / src.oclchannels() / src.elemSize1(); + int dstOffset = dst.offset / dst.oclchannels() / dst.elemSize1(); Context *clCxt = src.clCxt; string kernelName = "medianFilter"; @@ -610,12 +613,12 @@ namespace cv if(m == 3) { string kernelName = "medianFilter3"; - openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } else if(m == 5) { string kernelName = "medianFilter5"; - openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } else { @@ -623,7 +626,7 @@ namespace cv //string kernelName = "medianFilter"; //args.push_back( make_pair( sizeof(cl_int),(void*)&m)); - //openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.channels(),-1); + //openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.oclchannels(),-1); } } @@ -632,48 +635,49 @@ namespace cv // copyMakeBorder void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar) { - //CV_Assert(src.channels() != 2); + //CV_Assert(src.oclchannels() != 2); CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0); - if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi - { - if(((bordertype & cv::BORDER_ISOLATED) == 0) && - (bordertype != cv::BORDER_CONSTANT) && - (bordertype != cv::BORDER_REPLICATE)) - { - CV_Error(CV_StsBadArg,"unsupported border type"); - } - } - bordertype &= ~cv::BORDER_ISOLATED; - if((bordertype == cv::BORDER_REFLECT) || (bordertype == cv::BORDER_WRAP)) - { - CV_Assert((src.cols>=left) && (src.cols>=right) && (src.rows >= top) && (src.rows >= bottom)); - } - if(bordertype == cv::BORDER_REFLECT_101) - { - CV_Assert((src.cols>left) && (src.cols>right) && (src.rows > top) && (src.rows > bottom)); - } + if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi + { + if(((bordertype & cv::BORDER_ISOLATED) == 0) && + (bordertype != cv::BORDER_CONSTANT) && + (bordertype != cv::BORDER_REPLICATE)) + { + CV_Error(CV_StsBadArg, "unsupported border type"); + } + } + bordertype &= ~cv::BORDER_ISOLATED; + if((bordertype == cv::BORDER_REFLECT) || (bordertype == cv::BORDER_WRAP)) + { + CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom)); + } + if(bordertype == cv::BORDER_REFLECT_101) + { + CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom)); + } dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); - int srcStep = src.step1() / src.channels(); - int dstStep = dst.step1() / dst.channels(); + int srcStep = src.step1() / src.oclchannels(); + int dstStep = dst.step1() / dst.oclchannels(); int srcOffset = src.offset / src.elemSize(); int dstOffset = dst.offset / dst.elemSize(); - int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101}; - const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"}; - int bordertype_index; - for(bordertype_index=0;bordertype_index > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); @@ -683,186 +687,186 @@ namespace cv args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep)); args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset)); args.push_back( make_pair( sizeof(cl_int), (void *)&top)); args.push_back( make_pair( sizeof(cl_int), (void *)&left)); - char compile_option[64]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - }val; - switch(dst.depth()) - { - case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - if(((dst.offset & 3) ==0) && ((dst.cols & 3) == 0)) - { - kernelName = "copymakeborder_C1_D0"; - globalThreads[0] = (dst.cols/4 + localThreads[0]-1) / localThreads[0] * localThreads[0]; - } - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 2: - sprintf(compile_option, "-D GENTYPE=int2 -D %s",borderstr[bordertype_index]); - cl_int2 i2val; - i2val.s[0] = val.ival.s[0]; - i2val.s[1] = val.ival.s[1]; - args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4 -D %s",borderstr[bordertype_index]); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); - } - - openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, localThreads, args, -1, -1,compile_option); - //uchar* cputemp=new uchar[32*dst.wholerows]; - ////int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; - //openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, (cl_mem)dst.data, CL_TRUE, - // 0, 32*dst.wholerows, cputemp, 0, NULL, NULL)); - //for(int i=0;i(scalar.val[0]); + val.uval.s[1] = saturate_cast(scalar.val[1]); + val.uval.s[2] = saturate_cast(scalar.val[2]); + val.uval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=uchar -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); + if(((dst.offset & 3) == 0) && ((dst.cols & 3) == 0)) + { + kernelName = "copymakeborder_C1_D0"; + globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; + } + break; + case 4: + sprintf(compile_option, "-D GENTYPE=uchar4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_8S: + val.cval.s[0] = saturate_cast(scalar.val[0]); + val.cval.s[1] = saturate_cast(scalar.val[1]); + val.cval.s[2] = saturate_cast(scalar.val[2]); + val.cval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=char -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=char4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_16U: + val.usval.s[0] = saturate_cast(scalar.val[0]); + val.usval.s[1] = saturate_cast(scalar.val[1]); + val.usval.s[2] = saturate_cast(scalar.val[2]); + val.usval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=ushort -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=ushort4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_16S: + val.shval.s[0] = saturate_cast(scalar.val[0]); + val.shval.s[1] = saturate_cast(scalar.val[1]); + val.shval.s[2] = saturate_cast(scalar.val[2]); + val.shval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=short -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=short4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_32S: + val.ival.s[0] = saturate_cast(scalar.val[0]); + val.ival.s[1] = saturate_cast(scalar.val[1]); + val.ival.s[2] = saturate_cast(scalar.val[2]); + val.ival.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=int -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); + break; + case 2: + sprintf(compile_option, "-D GENTYPE=int2 -D %s", borderstr[bordertype_index]); + cl_int2 i2val; + i2val.s[0] = val.ival.s[0]; + i2val.s[1] = val.ival.s[1]; + args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=int4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_32F: + val.fval.s[0] = scalar.val[0]; + val.fval.s[1] = scalar.val[1]; + val.fval.s[2] = scalar.val[2]; + val.fval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=float -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=float4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + case CV_64F: + val.dval.s[0] = scalar.val[0]; + val.dval.s[1] = scalar.val[1]; + val.dval.s[2] = scalar.val[2]; + val.dval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=double -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=double4 -D %s", borderstr[bordertype_index]); + args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); + } + + openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); + //uchar* cputemp=new uchar[32*dst.wholerows]; + ////int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; + //openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, (cl_mem)dst.data, CL_TRUE, + // 0, 32*dst.wholerows, cputemp, 0, NULL, NULL)); + //for(int i=0;i impl -> double_support != 0) - { - cl_int st; - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); - openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); - }else{ - cl_int st; - for(int m=0;m<2;m++) - for(int n=0;n<3;n++) - { - float_coeffs[m][n]=coeffs[m][n]; - } - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); - - } + if(src.clCxt -> impl -> double_support != 0) + { + cl_int st; + coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st ); + openCLVerifyCall(st); + openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0)); + } + else + { + cl_int st; + for(int m = 0; m < 2; m++) + for(int n = 0; n < 3; n++) + { + float_coeffs[m][n] = coeffs[m][n]; + } + coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st ); + openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0)); + + } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; @@ -993,39 +999,41 @@ namespace cv args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm)); args.push_back(make_pair(sizeof(cl_int), (void *)&cols)); - openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); openCLSafeCall(clReleaseMemObject(coeffs_cm)); } void warpPerspective_gpu(const oclMat &src, oclMat &dst, double coeffs[3][3], int interpolation) { - CV_Assert( (src.channels() == dst.channels()) ); + CV_Assert( (src.oclchannels() == dst.oclchannels()) ); int srcStep = src.step1(); int dstStep = dst.step1(); - float float_coeffs[3][3]; - cl_mem coeffs_cm; + float float_coeffs[3][3]; + cl_mem coeffs_cm; Context *clCxt = src.clCxt; string s[3] = {"NN", "Linear", "Cubic"}; string kernelName = "warpPerspective" + s[interpolation]; - if(src.clCxt -> impl -> double_support != 0) - { - cl_int st; - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); - openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); - }else{ - cl_int st; - for(int m=0;m<3;m++) - for(int n=0;n<3;n++) - float_coeffs[m][n]=coeffs[m][n]; - - coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); - openCLVerifyCall(st); - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); - } + if(src.clCxt -> impl -> double_support != 0) + { + cl_int st; + coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st ); + openCLVerifyCall(st); + openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0)); + } + else + { + cl_int st; + for(int m = 0; m < 3; m++) + for(int n = 0; n < 3; n++) + float_coeffs[m][n] = coeffs[m][n]; + + coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st ); + openCLVerifyCall(st); + openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0)); + } //TODO: improve this kernel size_t blkSizeX = 16, blkSizeY = 16; size_t glbSizeX; @@ -1061,7 +1069,7 @@ namespace cv args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm)); args.push_back(make_pair(sizeof(cl_int), (void *)&cols)); - openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); openCLSafeCall(clReleaseMemObject(coeffs_cm)); } } @@ -1070,7 +1078,7 @@ namespace cv { int interpolation = flags & INTER_MAX; - CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3); + CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); dst.create(dsize, src.type()); @@ -1092,7 +1100,7 @@ namespace cv { int interpolation = flags & INTER_MAX; - CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3); + CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3); CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC); dst.create(dsize, src.type()); @@ -1119,9 +1127,9 @@ namespace cv void integral(const oclMat &src, oclMat &sum, oclMat &sqsum) { CV_Assert(src.type() == CV_8UC1); - if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } int vlen = 4; int offset = src.offset / vlen; @@ -1213,10 +1221,13 @@ namespace cv if (ksize < 0) scale *= 2.; - if (src.depth() == CV_8U){ + if (src.depth() == CV_8U) + { scale *= 255.; scale = 1. / scale; - }else{ + } + else + { scale = 1. / scale; } if (ksize > 0) @@ -1290,11 +1301,11 @@ namespace cv void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int borderType) { - if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } - CV_Assert(src.cols >= blockSize/2 && src.rows >= blockSize/2); + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, Dx, Dy, blockSize, ksize, borderType); @@ -1304,11 +1315,11 @@ namespace cv void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType) { - if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F) + if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F) { - CV_Error(CV_GpuNotSupported,"select device don't support double"); + CV_Error(CV_GpuNotSupported, "select device don't support double"); } - CV_Assert(src.cols >= blockSize/2 && src.rows >= blockSize/2); + CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2); oclMat Dx, Dy; CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT); extractCovData(src, Dx, Dy, blockSize, ksize, borderType); @@ -1355,7 +1366,7 @@ namespace cv if( src.empty() ) CV_Error( CV_StsBadArg, "The input image is empty" ); - if( src.depth() != CV_8U || src.channels() != 4 ) + if( src.depth() != CV_8U || src.oclchannels() != 4 ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); if(src.clCxt->impl->double_support == 0) @@ -1423,7 +1434,7 @@ namespace cv if( src.empty() ) CV_Error( CV_StsBadArg, "The input image is empty" ); - if( src.depth() != CV_8U || src.channels() != 4 ) + if( src.depth() != CV_8U || src.oclchannels() != 4 ) CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" ); if(src.clCxt->impl->double_support == 0) @@ -1472,47 +1483,47 @@ namespace cv int dataWidth_bits = 4; int mask = dataWidth - 1; - int cols = mat_src.cols * mat_src.channels(); + int cols = mat_src.cols * mat_src.oclchannels(); int src_offset = mat_src.offset; int hist_step = mat_sub_hist.step >> 2; int left_col = 0, right_col = 0; - if(cols >= dataWidth*2 -1) + if(cols >= dataWidth * 2 - 1) { - left_col = dataWidth - (src_offset & mask); - left_col &= mask; - src_offset += left_col; - cols -= left_col; - right_col = cols & mask; - cols -= right_col; + left_col = dataWidth - (src_offset & mask); + left_col &= mask; + src_offset += left_col; + cols -= left_col; + right_col = cols & mask; + cols -= right_col; } else { - left_col = cols; - right_col = 0; - cols = 0; - globalThreads[0] = 0; + left_col = cols; + right_col = 0; + cols = 0; + globalThreads[0] = 0; } vector > args; if(globalThreads[0] != 0) { - int tempcols = cols >> dataWidth_bits; - int inc_x = globalThreads[0] % tempcols; - int inc_y = globalThreads[0] / tempcols; - src_offset >>= dataWidth_bits; - int src_step = mat_src.step >> dataWidth_bits; - int datacount = tempcols * mat_src.rows; - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&datacount)); - args.push_back( make_pair( sizeof(cl_int), (void *)&tempcols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&inc_x)); - args.push_back( make_pair( sizeof(cl_int), (void *)&inc_y)); - args.push_back( make_pair( sizeof(cl_int), (void *)&hist_step)); - openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth); + int tempcols = cols >> dataWidth_bits; + int inc_x = globalThreads[0] % tempcols; + int inc_y = globalThreads[0] / tempcols; + src_offset >>= dataWidth_bits; + int src_step = mat_src.step >> dataWidth_bits; + int datacount = tempcols * mat_src.rows; + args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&datacount)); + args.push_back( make_pair( sizeof(cl_int), (void *)&tempcols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&inc_x)); + args.push_back( make_pair( sizeof(cl_int), (void *)&inc_y)); + args.push_back( make_pair( sizeof(cl_int), (void *)&hist_step)); + openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth); } if(left_col != 0 || right_col != 0) { @@ -1522,7 +1533,7 @@ namespace cv localThreads[1] = 256; globalThreads[0] = left_col + right_col; globalThreads[1] = (mat_src.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; - + args.clear(); args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step)); @@ -1586,93 +1597,101 @@ namespace cv LUT(mat_src, lut, mat_dst); } //////////////////////////////////bilateralFilter//////////////////////////////////////////////////// -static void -oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d, - double sigma_color, double sigma_space, - int borderType ) -{ - int cn = src.channels(); - int i, j, k, maxk, radius; - Size size = src.size(); - - CV_Assert( (src.type() == CV_8UC1 || src.download_channels == 3) && - src.type() == dst.type() && src.size() == dst.size() && - src.data != dst.data ); - - if( sigma_color <= 0 ) - sigma_color = 1; - if( sigma_space <= 0 ) - sigma_space = 1; - - double gauss_color_coeff = -0.5/(sigma_color*sigma_color); - double gauss_space_coeff = -0.5/(sigma_space*sigma_space); - - if( d <= 0 ) - radius = cvRound(sigma_space*1.5); - else - radius = d/2; - radius = MAX(radius, 1); - d = radius*2 + 1; - - oclMat temp; - copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); - - vector _color_weight(cn*256); - vector _space_weight(d*d); - vector _space_ofs(d*d); - float* color_weight = &_color_weight[0]; - float* space_weight = &_space_weight[0]; - int* space_ofs = &_space_ofs[0]; - - // initialize color-related bilateral filter coefficients - for( i = 0; i < 256*cn; i++ ) - color_weight[i] = (float)std::exp(i*i*gauss_color_coeff); - - // initialize space-related bilateral filter coefficients - for( i = -radius, maxk = 0; i <= radius; i++ ) - for( j = -radius; j <= radius; j++ ) + static void + oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d, + double sigma_color, double sigma_space, + int borderType ) { - double r = std::sqrt((double)i*i + (double)j*j); - if( r > radius ) - continue; - space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); - space_ofs[maxk++] = (int)(i*temp.step + j*cn); + int cn = src.channels(); + int i, j, k, maxk, radius; + Size size = src.size(); + + CV_Assert( (src.channels() == 1 || src.channels() == 3) && + src.type() == dst.type() && src.size() == dst.size() && + src.data != dst.data ); + + if( sigma_color <= 0 ) + sigma_color = 1; + if( sigma_space <= 0 ) + sigma_space = 1; + + double gauss_color_coeff = -0.5 / (sigma_color * sigma_color); + double gauss_space_coeff = -0.5 / (sigma_space * sigma_space); + + if( d <= 0 ) + radius = cvRound(sigma_space * 1.5); + else + radius = d / 2; + radius = MAX(radius, 1); + d = radius * 2 + 1; + + oclMat temp; + copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); + + vector _color_weight(cn * 256); + vector _space_weight(d * d); + vector _space_ofs(d * d); + float *color_weight = &_color_weight[0]; + float *space_weight = &_space_weight[0]; + int *space_ofs = &_space_ofs[0]; + int dst_step_in_pixel = dst.step / dst.elemSize(); + int dst_offset_in_pixel = dst.offset / dst.elemSize(); + int temp_step_in_pixel = temp.step / temp.elemSize(); + // initialize color-related bilateral filter coefficients + for( i = 0; i < 256 * cn; i++ ) + color_weight[i] = (float)std::exp(i * i * gauss_color_coeff); + + // initialize space-related bilateral filter coefficients + for( i = -radius, maxk = 0; i <= radius; i++ ) + for( j = -radius; j <= radius; j++ ) + { + double r = std::sqrt((double)i * i + (double)j * j); + if( r > radius ) + continue; + space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff); + space_ofs[maxk++] = (int)(i * temp_step_in_pixel + j); + } + oclMat oclcolor_weight(1, cn * 256, CV_32FC1, color_weight); + oclMat oclspace_weight(1, d * d, CV_32FC1, space_weight); + oclMat oclspace_ofs(1, d * d, CV_32SC1, space_ofs); + + string kernelName = "bilateral"; + size_t localThreads[3] = { 16, 16, 1 }; + size_t globalThreads[3] = { (dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], + (dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], + 1 + }; + if((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0)) + { + kernelName = "bilateral2"; + globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; + } + vector > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&temp.data )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); + args.push_back( make_pair( sizeof(cl_int), (void *)&maxk )); + args.push_back( make_pair( sizeof(cl_int), (void *)&radius )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step_in_pixel )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_in_pixel )); + args.push_back( make_pair( sizeof(cl_int), (void *)&temp_step_in_pixel )); + args.push_back( make_pair( sizeof(cl_int), (void *)&temp.rows )); + args.push_back( make_pair( sizeof(cl_int), (void *)&temp.cols )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data )); + args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data )); + openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth()); } - oclMat oclcolor_weight(1,cn*256,CV_32FC1,color_weight); - oclMat oclspace_weight(1,d*d,CV_32FC1,space_weight); - oclMat oclspace_ofs(1,d*d,CV_32SC1,space_ofs); - - string kernelName = "bilateral"; - size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { (dst.cols+ localThreads[0]-1)/localThreads[0] * localThreads[0], - (dst.rows+ localThreads[1]-1)/localThreads[1]* localThreads[1], - 1}; - vector > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&temp.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&maxk )); - args.push_back( make_pair( sizeof(cl_int), (void *)&radius )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); - args.push_back( make_pair( sizeof(cl_int), (void *)&temp.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&temp.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&temp.cols )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data )); - args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data )); - openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, -1, -1); -} void bilateralFilter(const oclMat &src, oclMat &dst, int radius, double sigmaclr, double sigmaspc, int borderType) { - dst.create( src.size(), src.type() ); - if( src.depth() == CV_8U ) - oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType ); - else - CV_Error( CV_StsUnsupportedFormat, - "Bilateral filtering is only implemented for 8uimages" ); + dst.create( src.size(), src.type() ); + if( src.depth() == CV_8U ) + oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType ); + else + CV_Error( CV_StsUnsupportedFormat, + "Bilateral filtering is only implemented for 8uimages" ); } } @@ -1682,32 +1701,33 @@ inline int divUp(int total, int grain) { return (total + grain - 1) / grain; } -void convolve_run(const oclMat &src, const oclMat &temp1,oclMat &dst,string kernelName,const char** kernelString) +void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString) { CV_Assert(src.depth() == CV_32FC1); CV_Assert(temp1.depth() == CV_32F); - CV_Assert(temp1.cols <= 17 && temp1.rows <=17); + CV_Assert(temp1.cols <= 17 && temp1.rows <= 17); - dst.create(src.size(),src.type()); + dst.create(src.size(), src.type()); CV_Assert(src.cols == dst.cols && src.rows == dst.rows); CV_Assert(src.type() == dst.type()); Context *clCxt = src.clCxt; - int channels = dst.channels(); + int channels = dst.oclchannels(); int depth = dst.depth(); - size_t vector_length =1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length-1); + size_t vector_length = 1; + int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); int cols = divUp(dst.cols * channels + offset_cols, vector_length); int rows = dst.rows; size_t localThreads[3] = { 16, 16, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(rows, localThreads[1]) * localThreads[1], - 1}; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(rows, localThreads[1]) *localThreads[1], + 1 + }; - vector > args; + vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); @@ -1721,14 +1741,14 @@ void convolve_run(const oclMat &src, const oclMat &temp1,oclMat &dst,string kern openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void cv::ocl::convolve(const oclMat& x, const oclMat& t, oclMat& y) +void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) { CV_Assert(x.depth() == CV_32F); CV_Assert(t.depth() == CV_32F); CV_Assert(x.type() == y.type() && x.size() == y.size()); - y.create(x.size(),x.type()); + y.create(x.size(), x.type()); string kernelName = "convolve"; - + convolve_run(x, t, y, kernelName, &imgproc_convolve); } #endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 6c3f94b..b7f1feb 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -77,31 +77,31 @@ namespace cv } void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/, - size_t /*widthInBytes*/, size_t /*height*/) + size_t /*widthInBytes*/, size_t /*height*/) { throw_nogpu(); } void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/, - const void * /*src*/, size_t /*spitch*/, - size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) + const void * /*src*/, size_t /*spitch*/, + size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) { throw_nogpu(); } void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/, - const void * /*src*/, size_t /*spitch*/, - size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) + const void * /*src*/, size_t /*spitch*/, + size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) { throw_nogpu(); } - cl_mem openCLCreateBuffer(Context *,size_t, size_t) + cl_mem openCLCreateBuffer(Context *, size_t, size_t) { throw_nogpu(); } - void openCLReadBuffer(Context *, cl_mem, void*, size_t) + void openCLReadBuffer(Context *, cl_mem, void *, size_t) { throw_nogpu(); } @@ -112,19 +112,19 @@ namespace cv } cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/, - const char ** /*fileName*/, string /*kernelName*/) + const char ** /*fileName*/, string /*kernelName*/) { throw_nogpu(); } void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/, - size_t * /*globalThreads*/, size_t * /*localThreads*/) + size_t * /*globalThreads*/, size_t * /*localThreads*/) { throw_nogpu(); } cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size) + const size_t size) { throw_nogpu(); } @@ -226,7 +226,7 @@ namespace cv int double_support; Impl() { - memset(extra_options,0,512); + memset(extra_options, 0, 512); } }; @@ -240,23 +240,23 @@ namespace cv cl_device_type _devicetype; switch(devicetype) { - case CVCL_DEVICE_TYPE_DEFAULT: - _devicetype = CL_DEVICE_TYPE_DEFAULT; - break; - case CVCL_DEVICE_TYPE_CPU: - _devicetype = CL_DEVICE_TYPE_CPU; - break; - case CVCL_DEVICE_TYPE_GPU: - _devicetype = CL_DEVICE_TYPE_GPU; - break; - case CVCL_DEVICE_TYPE_ACCELERATOR: - _devicetype = CL_DEVICE_TYPE_ACCELERATOR; - break; - case CVCL_DEVICE_TYPE_ALL: - _devicetype = CL_DEVICE_TYPE_ALL; - break; - default: - CV_Error(CV_GpuApiCallError,"Unkown device type"); + case CVCL_DEVICE_TYPE_DEFAULT: + _devicetype = CL_DEVICE_TYPE_DEFAULT; + break; + case CVCL_DEVICE_TYPE_CPU: + _devicetype = CL_DEVICE_TYPE_CPU; + break; + case CVCL_DEVICE_TYPE_GPU: + _devicetype = CL_DEVICE_TYPE_GPU; + break; + case CVCL_DEVICE_TYPE_ACCELERATOR: + _devicetype = CL_DEVICE_TYPE_ACCELERATOR; + break; + case CVCL_DEVICE_TYPE_ALL: + _devicetype = CL_DEVICE_TYPE_ALL; + break; + default: + CV_Error(CV_GpuApiCallError, "Unkown device type"); } int devcienums = 0; // Platform info @@ -288,6 +288,7 @@ namespace cv ocltmpinfo.impl->devices.push_back(devices[j]); openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL)); ocltmpinfo.impl->devName.push_back(std::string(deviceName)); + ocltmpinfo.DeviceName.push_back(std::string(deviceName)); } delete[] devices; oclinfo.push_back(ocltmpinfo); @@ -314,19 +315,19 @@ namespace cv openCLVerifyCall(status); //create the command queue using the first device of the list oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum], - CL_QUEUE_PROFILING_ENABLE, &status); + CL_QUEUE_PROFILING_ENABLE, &status); openCLVerifyCall(status); //get device information openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE, - sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL)); + sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL)); openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL)); + sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL)); oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions]; openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES, - sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL)); + sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL)); openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL)); + sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL)); //initialize extra options for compilation. Currently only fp64 is included. //Assume 4KB is enough to store all possible extensions. @@ -334,9 +335,9 @@ namespace cv char extends_set[EXT_LEN]; size_t extends_size; openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS, - EXT_LEN, (void *)extends_set, &extends_size)); + EXT_LEN, (void *)extends_set, &extends_size)); CV_Assert(extends_size < EXT_LEN); - extends_set[EXT_LEN-1] = 0; + extends_set[EXT_LEN - 1] = 0; //oclinfo.extra_options = NULL; int fp64_khr = string(extends_set).find("cl_khr_fp64"); @@ -347,86 +348,90 @@ namespace cv } Context::setContext(oclinfo); } - void* getoclContext() - { - return &(Context::getContext()->impl->clContext); - } - void* getoclCommandQueue() - { - return &(Context::getContext()->impl->clCmdQueue); - } + void *getoclContext() + + { + + return &(Context::getContext()->impl->clContext); + + } + + void *getoclCommandQueue() + { + return &(Context::getContext()->impl->clCmdQueue); + } void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) { cl_int status; status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0, - size, host_buffer, 0, NULL, NULL); + size, host_buffer, 0, NULL, NULL); openCLVerifyCall(status); } cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size) { cl_int status; - cl_mem buffer = clCreateBuffer(clCxt->impl->clContext,(cl_mem_flags)flag, size, NULL, &status); + cl_mem buffer = clCreateBuffer(clCxt->impl->clContext, (cl_mem_flags)flag, size, NULL, &status); openCLVerifyCall(status); return buffer; } void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch, - size_t widthInBytes, size_t height) + size_t widthInBytes, size_t height) { cl_int status; *dev_ptr = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE, - widthInBytes * height, 0, &status); + widthInBytes * height, 0, &status); openCLVerifyCall(status); *pitch = widthInBytes; } void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, - const void *src, size_t spitch, - size_t width, size_t height, enum openCLMemcpyKind kind, int channels) + const void *src, size_t spitch, + size_t width, size_t height, enum openCLMemcpyKind kind, int channels) { size_t buffer_origin[3] = {0, 0, 0}; size_t host_origin[3] = {0, 0, 0}; size_t region[3] = {width, height, 1}; if(kind == clMemcpyHostToDevice) { - if(dpitch == width || channels==3 || height == 1) - { - openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - 0, width*height, src, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, - buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); - } + if(dpitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, + 0, width * height, src, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE, + buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0)); + } } else if(kind == clMemcpyDeviceToHost) { - if(spitch == width || channels==3 || height == 1) - { - openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - 0, width*height, dst, 0, NULL, NULL)); - } - else - { - openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, - buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); - } + if(spitch == width || channels == 3 || height == 1) + { + openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, + 0, width * height, dst, 0, NULL, NULL)); + } + else + { + openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE, + buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0)); + } } } void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, - const void *src, size_t spitch, - size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind) + const void *src, size_t spitch, + size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind) { size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0}; size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0}; size_t region[3] = {width, height, 1}; openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin, - region, spitch, 0, dpitch, 0, 0, 0, 0)); + region, spitch, 0, dpitch, 0, 0, 0, 0)); } void openCLFree(void *devPtr) @@ -438,11 +443,11 @@ namespace cv return openCLGetKernelFromSource(clCxt, source, kernelName, NULL); } - + void setBinpath(const char *path) { - Context *clcxt = Context::getContext(); - clcxt->impl->Binpath = path; + Context *clcxt = Context::getContext(); + clcxt->impl->Binpath = path; } int savetofile(const Context *clcxt, cl_program &program, const char *fileName) { @@ -453,16 +458,16 @@ namespace cv size_t *binarySizes = (size_t *)malloc( sizeof(size_t) * numDevices ); openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, - binarySizes, NULL)); + CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, + binarySizes, NULL)); size_t i = 0; //copy over all of the generated binaries. char **binaries = (char **)malloc( sizeof(char *) * numDevices ); if(binaries == NULL) { - CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries)\r\n"); + CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries)\r\n"); } for(i = 0; i < numDevices; i++) @@ -472,7 +477,7 @@ namespace cv binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]); if(binaries[i] == NULL) { - CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries[i])\r\n"); + CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries[i])\r\n"); } } else @@ -481,10 +486,10 @@ namespace cv } } openCLSafeCall(clGetProgramInfo(program, - CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, - binaries, - NULL)); + CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, + binaries, + NULL)); //dump out each binary into its own separate file. for(i = 0; i < numDevices; i++) @@ -493,10 +498,10 @@ namespace cv { char deviceName[1024]; openCLSafeCall(clGetDeviceInfo(devices[i], - CL_DEVICE_NAME, - sizeof(deviceName), - deviceName, - NULL)); + CL_DEVICE_NAME, + sizeof(deviceName), + deviceName, + NULL)); printf( "%s binary kernel: %s\n", deviceName, fileName); FILE *fp = fopen(fileName, "wb+"); @@ -516,7 +521,7 @@ namespace cv else { printf("Skipping %s since there is no binary data to write!\n", - fileName); + fileName); } } free(binarySizes); @@ -526,24 +531,24 @@ namespace cv cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName, - const char *build_options) + const char *build_options) { cl_kernel kernel; cl_program program ; cl_int status = 0; stringstream src_sign; string srcsign; - string filename; + string filename; CV_Assert(programCache != NULL); if(NULL != build_options) - { + { src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options; - } + } else - { - src_sign << (int64)(*source) << clCxt->impl->clContext; - } + { + src_sign << (int64)(*source) << clCxt->impl->clContext; + } srcsign = src_sign.str(); program = NULL; @@ -554,31 +559,31 @@ namespace cv //config build programs char all_build_options[1024]; memset(all_build_options, 0, 1024); - char zeromem[512]={0}; - if(0!=memcmp(clCxt -> impl->extra_options, zeromem,512)) + char zeromem[512] = {0}; + if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512)) strcat(all_build_options, clCxt -> impl->extra_options); strcat(all_build_options, " "); if(build_options != NULL) strcat(all_build_options, build_options); - if(all_build_options != NULL) - { - filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb"; - } - else - { - filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb"; - } + if(all_build_options != NULL) + { + filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb"; + } + else + { + filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb"; + } FILE *fp; fp = fopen(filename.c_str(), "rb"); if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should genetate a binary file for the first time. { program = clCreateProgramWithSource( - clCxt->impl->clContext, 1, source, NULL, &status); + clCxt->impl->clContext, 1, source, NULL, &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && clCxt->impl->Binpath.size()) - savetofile(clCxt, program, filename.c_str()); + if(status == CL_SUCCESS && clCxt->impl->Binpath.size()) + savetofile(clCxt, program, filename.c_str()); } else { @@ -590,12 +595,12 @@ namespace cv fclose(fp); cl_int status = 0; program = clCreateProgramWithBinary(clCxt->impl->clContext, - 1, - &(clCxt->impl->devices[0]), - (const size_t *)&binarySize, - (const unsigned char **)&binary, - NULL, - &status); + 1, + &(clCxt->impl->devices[0]), + (const size_t *)&binarySize, + (const unsigned char **)&binary, + NULL, + &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL); } @@ -608,15 +613,15 @@ namespace cv char *buildLog = NULL; size_t buildLogSize = 0; logStatus = clGetProgramBuildInfo(program, - clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, - buildLog, &buildLogSize); + clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, + buildLog, &buildLogSize); if(logStatus != CL_SUCCESS) cout << "Failed to build the program and get the build info." << endl; buildLog = new char[buildLogSize]; CV_DbgAssert(!!buildLog); memset(buildLog, 0, buildLogSize); openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[0], - CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); + CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL)); cout << "\n\t\t\tBUILD LOG\n"; cout << buildLog << endl; delete buildLog; @@ -626,8 +631,8 @@ namespace cv //Cache the binary for future use if build_options is null if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE) programCache->addProgram(srcsign, program); - else - cout << "Warning: code cache has been full.\n"; + else + cout << "Warning: code cache has been full.\n"; } kernel = clCreateKernel(program, kernelName.c_str(), &status); openCLVerifyCall(status); @@ -635,16 +640,16 @@ namespace cv } void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize, - size_t *globalThreads, size_t *localThreads) + size_t *globalThreads, size_t *localThreads) { size_t kernelWorkGroupSize; openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0], - CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); + CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0)); CV_DbgAssert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) && - (localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) && - (localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) && - ((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) && - (localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize); + (localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) && + (localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) && + ((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) && + (localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize); } #ifdef PRINT_KERNEL_RUN_TIME @@ -652,8 +657,8 @@ namespace cv static double total_kernel_time = 0; #endif void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, - int depth, const char *build_options) + size_t localThreads[3], vector< pair > &args, int channels, + int depth, const char *build_options) { //construct kernel name //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number @@ -667,13 +672,13 @@ namespace cv cl_kernel kernel; kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); - + if ( localThreads != NULL) - { + { globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; - + size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2]; cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads); } @@ -682,11 +687,11 @@ namespace cv #ifndef PRINT_KERNEL_RUN_TIME openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); + localThreads, 0, NULL, NULL)); #else cl_event event = NULL; openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, &event)); + localThreads, 0, NULL, &event)); cl_ulong start_time, end_time, queue_time; double execute_time = 0; @@ -694,13 +699,13 @@ namespace cv openCLSafeCall(clWaitForEvents(1, &event)); openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, - sizeof(cl_ulong), &start_time, 0)); + sizeof(cl_ulong), &start_time, 0)); openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, - sizeof(cl_ulong), &end_time, 0)); + sizeof(cl_ulong), &end_time, 0)); openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, - sizeof(cl_ulong), &queue_time, 0)); + sizeof(cl_ulong), &queue_time, 0)); execute_time = (double)(end_time - start_time) / (1000 * 1000); total_time = (double)(end_time - queue_time) / (1000 * 1000); @@ -719,20 +724,20 @@ namespace cv } void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth) + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth) { openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, - channels, depth, NULL); + channels, depth, NULL); } void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options) + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options) { #ifndef PRINT_KERNEL_RUN_TIME openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); + build_options); #else string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"}; cout << endl; @@ -752,7 +757,7 @@ namespace cv int i = 0; for(i = 0; i < RUN_TIMES; i++) openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options); + build_options); cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl; cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl; @@ -760,7 +765,7 @@ namespace cv } cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size) + const size_t size) { int status; cl_mem con_struct; @@ -769,7 +774,7 @@ namespace cv openCLSafeCall(status); openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size, - value, 0, 0, 0)); + value, 0, 0, 0)); return con_struct; @@ -801,7 +806,7 @@ namespace cv clcxt->impl->clContext = oclinfo.impl->oclcontext; clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue; clcxt->impl->devices = &oclinfo.impl->devices[oclinfo.impl->devnum]; - clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum]; + clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum]; clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions; clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize; clcxt->impl->maxWorkItemSizes = oclinfo.impl->maxWorkItemSizes; @@ -873,6 +878,7 @@ namespace cv //} impl->devices.clear(); impl->devName.clear(); + DeviceName.clear(); } Info::~Info() { @@ -895,6 +901,7 @@ namespace cv { impl->devices.push_back(m.impl->devices[i]); impl->devName.push_back(m.impl->devName[i]); + DeviceName.push_back(m.DeviceName[i]); } return *this; } diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp new file mode 100644 index 0000000..443d43e --- /dev/null +++ b/modules/ocl/src/interpolate_frames.cpp @@ -0,0 +1,315 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Comuter Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular urpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include +#include "precomp.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + + +#if !defined (HAVE_OPENCL) +void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1, + const oclMat &fu, const oclMat &fv, + const oclMat &bu, const oclMat &bv, + float pos, oclMat &newFrame, oclMat &buf) +{ + throw_nogpu(); +} +#else + +namespace cv +{ + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *interpolate_frames; + + namespace interpolate + { + //The following are ported from NPP_staging.cu + // As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer, + // we may have to do this on kernel + void memsetKernel(float val, oclMat &img, int height, int offset); + void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset); + void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale, + int b_offset, int d_offset); // buffer, dst offset + + //OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1 + void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v, + oclMat &buffer, int buf_offset, float timeScale, int dst_offset); + //OpenCL conversion of BlendFrames + void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, + float pos, oclMat &newFrame, cl_mem &, cl_mem &); + + // bind a buffer to an image + void bindImgTex(const oclMat &img, cl_mem &tex); + } + } +} + +void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1, + const oclMat &fu, const oclMat &fv, + const oclMat &bu, const oclMat &bv, + float pos, oclMat &newFrame, oclMat &buf) +{ + CV_Assert(frame0.type() == CV_32FC1); + CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type()); + CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type()); + CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type()); + CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type()); + CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type()); + + newFrame.create(frame0.size(), frame0.type()); + + buf.create(6 * frame0.rows, frame0.cols, CV_32FC1); + buf.setTo(Scalar::all(0)); + + size_t step = frame0.step; + + CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step); + cl_mem tex_src0 = 0, tex_src1 = 0; + + // warp flow + using namespace interpolate; + + bindImgTex(frame0, tex_src0); + bindImgTex(frame1, tex_src1); + + // CUDA Offsets + enum + { + cov0 = 0, + cov1, + fwdU, + fwdV, + bwdU, + bwdV + }; + + vectorWarp(fu, fu, fv, buf, cov0, pos, fwdU); + vectorWarp(fv, fu, fv, buf, cov0, pos, fwdV); + vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU); + vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU); + + blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1); + + openCLFree(tex_src0); + openCLFree(tex_src1); +} + +void interpolate::memsetKernel(float val, oclMat &img, int height, int offset) +{ + Context *clCxt = Context::getContext(); + string kernelName = "memsetKernel"; + vector< pair > args; + int step = img.step / sizeof(float); + offset = step * height * offset; + + args.push_back( make_pair( sizeof(cl_float), (void *)&val)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&height)); + args.push_back( make_pair( sizeof(cl_int), (void *)&step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&offset)); + + size_t globalThreads[3] = {img.cols, height, 1}; + size_t localThreads[3] = {16, 16, 1}; + openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1); +} +void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset) +{ + Context *clCxt = Context::getContext(); + string kernelName = "normalizeKernel"; + vector< pair > args; + int step = buffer.step / sizeof(float); + factor_offset = step * height * factor_offset; + dst_offset = step * height * dst_offset; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buffer.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&height)); + args.push_back( make_pair( sizeof(cl_int), (void *)&step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&factor_offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset)); + + size_t globalThreads[3] = {buffer.cols, height, 1}; + size_t localThreads[3] = {16, 16, 1}; + openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale, + int b_offset, int d_offset) +{ + Context *clCxt = Context::getContext(); + string kernelName = "forwardWarpKernel"; + vector< pair > args; + int f_step = u.step / sizeof(float); // flow step + int b_step = buffer.step / sizeof(float); + + b_offset = b_step * src.rows * b_offset; + d_offset = b_step * src.rows * d_offset; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&f_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&b_step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&b_offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&d_offset)); + args.push_back( make_pair( sizeof(cl_float), (void *)&time_scale)); + + size_t globalThreads[3] = {src.cols, src.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v, + oclMat &buffer, int b_offset, float timeScale, int d_offset) +{ + memsetKernel(0, buffer, src.rows, b_offset); + forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset); + normalizeKernel(buffer, src.rows, b_offset, d_offset); +} + +void interpolate::blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1) +{ + int step = buffer.step / sizeof(float); + + Context *clCxt = Context::getContext(); + string kernelName = "blendFramesKernel"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src0)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src1)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&newFrame.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&step)); + args.push_back( make_pair( sizeof(cl_float), (void *)&pos)); + + size_t globalThreads[3] = {frame0.cols, frame0.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void interpolate::bindImgTex(const oclMat &img, cl_mem &texture) +{ + cl_image_format format; + int err; + int depth = img.depth(); + int channels = img.channels(); + + switch(depth) + { + case CV_8U: + format.image_channel_data_type = CL_UNSIGNED_INT8; + break; + case CV_32S: + format.image_channel_data_type = CL_UNSIGNED_INT32; + break; + case CV_32F: + format.image_channel_data_type = CL_FLOAT; + break; + default: + throw std::exception(); + break; + } + switch(channels) + { + case 1: + format.image_channel_order = CL_R; + break; + case 3: + format.image_channel_order = CL_RGB; + break; + case 4: + format.image_channel_order = CL_RGBA; + break; + default: + throw std::exception(); + break; + } + if(texture) + { + openCLFree(texture); + } + +#if CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = img.step / img.elemSize(); + desc.image_height = img.rows; + desc.image_depth = 0; + desc.image_array_size = 1; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.buffer = NULL; + desc.num_mip_levels = 0; + desc.num_samples = 0; + texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); +#else + texture = clCreateImage2D( + Context::getContext()->impl->clContext, + CL_MEM_READ_WRITE, + &format, + img.step / img.elemSize(), + img.rows, + 0, + NULL, + &err); +#endif + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { img.step / img.elemSize(), img.rows, 1 }; + clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0); + openCLSafeCall(err); +} +#endif//(HAVE_OPENCL) + diff --git a/modules/ocl/src/kernels/arithm_absdiff.cl b/modules/ocl/src/kernels/arithm_absdiff.cl index 6e17d52..6824fd8 100644 --- a/modules/ocl/src/kernels/arithm_absdiff.cl +++ b/modules/ocl/src/kernels/arithm_absdiff.cl @@ -70,9 +70,22 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); - uchar4 src2_data = vload4(0, src2 + src2_index); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); + uchar4 src2_data = vload4(0, src2 + src2_index_fix); + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = abs_diff(src1_data, src2_data); @@ -242,9 +255,15 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x); + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } uchar4 data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data)); diff --git a/modules/ocl/src/kernels/arithm_add.cl b/modules/ocl/src/kernels/arithm_add.cl index 3d5b13f..5870119 100644 --- a/modules/ocl/src/kernels/arithm_add.cl +++ b/modules/ocl/src/kernels/arithm_add.cl @@ -71,10 +71,22 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); - uchar4 src2_data = vload4(0, src2 + src2_index); - + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); + uchar4 src2_data = vload4(0, src2 + src2_index_fix); + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data); uchar4 tmp_data = convert_uchar4_sat(tmp); @@ -248,11 +260,31 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); - uchar4 src2_data = vload4(0, src2 + src2_index); - uchar4 mask_data = vload4(0, mask + mask_index); - + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int mask_index_fix = mask_index < 0 ? 0 : mask_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); + uchar4 src2_data = vload4(0, src2 + src2_index_fix); + uchar4 mask_data = vload4(0, mask + mask_index_fix); + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } + if(mask_index < 0) + { + uchar4 tmp; + tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx; + mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw; + } + uchar4 data = *((__global uchar4 *)(dst + dst_index)); short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data); uchar4 tmp_data = convert_uchar4_sat(tmp); diff --git a/modules/ocl/src/kernels/arithm_add_scalar.cl b/modules/ocl/src/kernels/arithm_add_scalar.cl index 4fa5e68..cdcff00 100644 --- a/modules/ocl/src/kernels/arithm_add_scalar.cl +++ b/modules/ocl/src/kernels/arithm_add_scalar.cl @@ -65,10 +65,16 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x); - + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + uchar4 data = *((__global uchar4 *)(dst + dst_index)); int4 tmp = convert_int4_sat(src1_data) + src2_data; uchar4 tmp_data = convert_uchar4_sat(tmp); diff --git a/modules/ocl/src/kernels/arithm_add_scalar_mask.cl b/modules/ocl/src/kernels/arithm_add_scalar_mask.cl index 9e41d2c..a8fb247 100644 --- a/modules/ocl/src/kernels/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/kernels/arithm_add_scalar_mask.cl @@ -68,10 +68,23 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int mask_index_fix = mask_index < 0 ? 0 : mask_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x); - uchar4 mask_data = vload4(0, mask + mask_index); + uchar4 mask_data = vload4(0, mask + mask_index_fix); + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(mask_index < 0) + { + uchar4 tmp; + tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx; + mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw; + } uchar4 data = *((__global uchar4 *)(dst + dst_index)); int4 tmp = convert_int4_sat(src1_data) + src2_data; diff --git a/modules/ocl/src/kernels/arithm_flip.cl b/modules/ocl/src/kernels/arithm_flip.cl index 0e12021..26ea481 100644 --- a/modules/ocl/src/kernels/arithm_flip.cl +++ b/modules/ocl/src/kernels/arithm_flip.cl @@ -71,9 +71,22 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1); int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src_data_0 = vload4(0, src + src_index_0); - uchar4 src_data_1 = vload4(0, src + src_index_1); + int src1_index_fix = src_index_0 < 0 ? 0 : src_index_0; + int src2_index_fix = src_index_1 < 0 ? 0 : src_index_1; + uchar4 src_data_0 = vload4(0, src + src1_index_fix); + uchar4 src_data_1 = vload4(0, src + src2_index_fix); + if(src_index_0 < 0) + { + uchar4 tmp; + tmp.xyzw = (src_index_0 == -2) ? src_data_0.zwxy:src_data_0.yzwx; + src_data_0.xyzw = (src_index_0 == -1) ? src_data_0.wxyz:tmp.xyzw; + } + if(src_index_1 < 0) + { + uchar4 tmp; + tmp.xyzw = (src_index_1 == -2) ? src_data_1.zwxy:src_data_1.yzwx; + src_data_1.xyzw = (src_index_1 == -1) ? src_data_1.wxyz:tmp.xyzw; + } uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0)); uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1)); diff --git a/modules/ocl/src/kernels/build_warps.cl b/modules/ocl/src/kernels/build_warps.cl new file mode 100644 index 0000000..4bf16c0 --- /dev/null +++ b/modules/ocl/src/kernels/build_warps.cl @@ -0,0 +1,237 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel + void buildWarpPlaneMaps + ( + __global float * map_x, + __global float * map_y, + __constant float * KRT, + int tl_u, + int tl_v, + int cols, + int rows, + int step_x, + int step_y, + float scale + ) +{ + int du = get_global_id(0); + int dv = get_global_id(1); + step_x /= sizeof(float); + step_y /= sizeof(float); + + __constant float * ck_rinv = KRT; + __constant float * ct = KRT + 9; + + if (du < cols && dv < rows) + { + float u = tl_u + du; + float v = tl_v + dv; + float x, y; + + float x_ = u / scale - ct[0]; + float y_ = v / scale - ct[1]; + + float z; + x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]); + y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]); + z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]); + + x /= z; + y /= z; + + map_x[dv * step_x + du] = x; + map_y[dv * step_y + du] = y; + } +} + +__kernel + void buildWarpCylindricalMaps + ( + __global float * map_x, + __global float * map_y, + __constant float * ck_rinv, + int tl_u, + int tl_v, + int cols, + int rows, + int step_x, + int step_y, + float scale + ) +{ + int du = get_global_id(0); + int dv = get_global_id(1); + step_x /= sizeof(float); + step_y /= sizeof(float); + + if (du < cols && dv < rows) + { + float u = tl_u + du; + float v = tl_v + dv; + float x, y; + + u /= scale; + float x_ = sin(u); + float y_ = v / scale; + float z_ = cos(u); + + float z; + x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_; + y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_; + z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_; + + if (z > 0) { x /= z; y /= z; } + else x = y = -1; + + map_x[dv * step_x + du] = x; + map_y[dv * step_y + du] = y; + } +} + +__kernel + void buildWarpSphericalMaps + ( + __global float * map_x, + __global float * map_y, + __constant float * ck_rinv, + int tl_u, + int tl_v, + int cols, + int rows, + int step_x, + int step_y, + float scale + ) +{ + int du = get_global_id(0); + int dv = get_global_id(1); + step_x /= sizeof(float); + step_y /= sizeof(float); + + if (du < cols && dv < rows) + { + float u = tl_u + du; + float v = tl_v + dv; + float x, y; + + v /= scale; + u /= scale; + + float sinv = sin(v); + float x_ = sinv * sin(u); + float y_ = - cos(v); + float z_ = sinv * cos(u); + + float z; + x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_; + y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_; + z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_; + + if (z > 0) { x /= z; y /= z; } + else x = y = -1; + + map_x[dv * step_x + du] = x; + map_y[dv * step_y + du] = y; + } +} + +__kernel + void buildWarpAffineMaps + ( + __global float * xmap, + __global float * ymap, + __constant float * c_warpMat, + int cols, + int rows, + int step_x, + int step_y + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + step_x /= sizeof(float); + step_y /= sizeof(float); + + if (x < cols && y < rows) + { + const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]; + const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]; + + map_x[y * step_x + x] = xcoo; + map_y[y * step_y + x] = ycoo; + } +} + +__kernel + void buildWarpPerspectiveMaps + ( + __global float * xmap, + __global float * ymap, + __constant float * c_warpMat, + int cols, + int rows, + int step_x, + int step_y + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + step_x /= sizeof(float); + step_y /= sizeof(float); + + if (x < cols && y < rows) + { + const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]); + + const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]); + const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]); + + map_x[y * step_x + x] = xcoo; + map_y[y * step_y + x] = ycoo; + } +} + diff --git a/modules/ocl/src/kernels/filtering_boxFilter.cl b/modules/ocl/src/kernels/filtering_boxFilter.cl index 1d6770d..763cd03 100644 --- a/modules/ocl/src/kernels/filtering_boxFilter.cl +++ b/modules/ocl/src/kernels/filtering_boxFilter.cl @@ -254,7 +254,8 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch //ss = convert_uint4(src[cur_addr]); int cur_col = clamp(startX + col, 0, src_whole_cols); - ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]); + if(con) + ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]); data[i] = con ? ss : 0; } @@ -269,6 +270,7 @@ __kernel void boxFilter_C4_D0(__global const uchar4 * restrict src, __global uch selected_col = ADDR_L(startX+col, 0, src_whole_cols); selected_col = ADDR_R(startX+col, src_whole_cols, selected_col); + data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]); } @@ -334,11 +336,12 @@ __kernel void boxFilter_C1_D5(__global const float *restrict src, __global float for(int i=0; i < ksY+1; i++) { con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows; - // int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr); - // ss = src[cur_addr]; - + //int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr); + //ss = src[cur_addr]; + int cur_col = clamp(startX + col, 0, src_whole_cols); - ss = src[(startY+i)*(src_step>>2) + cur_col]; + //ss = src[(startY+i)*(src_step>>2) + cur_col]; + ss = (startY+i)=0&&cur_col>=0&&cur_col>2) + cur_col]:0; data[i] = con ? ss : 0.f; } @@ -422,7 +425,8 @@ __kernel void boxFilter_C4_D5(__global const float4 *restrict src, __global floa //ss = src[cur_addr]; int cur_col = clamp(startX + col, 0, src_whole_cols); - ss = src[(startY+i)*(src_step>>4) + cur_col]; + //ss = src[(startY+i)*(src_step>>4) + cur_col]; + ss = (startY+i)=0&&cur_col>=0&&cur_col>4) + cur_col]:0; data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0); } diff --git a/modules/ocl/src/kernels/imgproc_bilateral.cl b/modules/ocl/src/kernels/imgproc_bilateral.cl index 5bb9379..0433e20 100644 --- a/modules/ocl/src/kernels/imgproc_bilateral.cl +++ b/modules/ocl/src/kernels/imgproc_bilateral.cl @@ -31,84 +31,8 @@ // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. -// -// - - -//#pragma OPENCL EXTENSION cl_amd_printf :enable -__kernel -void bilateral4(__global uchar4 *dst, - __global uchar4 *src, - int rows, - int cols, - int channels, - int radius, - int wholerows, - int wholecols, - int src_step, - int dst_step, - int src_offset, - int dst_offset, - __constant float *sigClr, - __constant float *sigSpc) -{ - uint lidx = get_local_id(0); - uint lidy = get_local_id(1); - - uint gdx = get_global_id(0); - uint gdy = get_global_id(1); - - uint gidx = gdx >=cols?cols-1:gdx; - uint gidy = gdy >=rows?rows-1:gdy; - - uchar4 p,q,tmp; - - float4 pf = 0,pq = 0,pd = 0; - float wt =0; - - int r = radius; - int ij = 0; - int ct = 0; - - uint index_src = src_offset/4 + gidy*src_step/4 + gidx; - uint index_dst = dst_offset/4 + gidy*dst_step/4 + gidx; - p = src[index_src]; - - uint gx,gy; - uint src_index,dst_index; - - for(int ii = -r;ii mul24(radius,radius)) continue; - gx = gidx + jj; - gy = gidy + ii; - - src_index = src_offset/4 + gy * src_step/4 + gx; - q = src[src_index]; - - - ct = abs(p.x-q.x)+abs(p.y-q.y)+abs(p.z-q.z); - wt =sigClr[ct]*sigSpc[(ii+radius)*(2*radius+1)+jj+radius]; - - pf.x += q.x*wt; - pf.y += q.y*wt; - pf.z += q.z*wt; -// pf.w += q.w*wt; - - pq += wt; - - } - } - - pd = pf/pq; - dst[index_dst] = convert_uchar4_rte(pd); -} - -__kernel void bilateral(__global uchar *dst, +__kernel void bilateral_C1_D0(__global uchar *dst, __global const uchar *src, const int dst_rows, const int dst_cols, @@ -128,8 +52,8 @@ __kernel void bilateral(__global uchar *dst, if((gidy=left_col) ? (gidx+cols) : gidx); - int src_index = src_offset + mad24(gidy, src_step, gidx); - barrier(CLK_LOCAL_MEM_FENCE); - int p = (int)src[src_index]; - p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p; - atomic_inc(subhist + p); + if(gidy= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p; + atomic_inc(subhist + p); + } barrier(CLK_LOCAL_MEM_FENCE); globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy]; diff --git a/modules/ocl/src/kernels/interpolate_frames.cl b/modules/ocl/src/kernels/interpolate_frames.cl new file mode 100644 index 0000000..005a55f --- /dev/null +++ b/modules/ocl/src/kernels/interpolate_frames.cl @@ -0,0 +1,252 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Peng Xiao, pengxiao@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable + +// Image read mode +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; + +// atomic add for 32bit floating point +inline void atomic_addf(volatile __global float *source, const float operand) { + union { + unsigned int intVal; + float floatVal; + } newVal; + union { + unsigned int intVal; + float floatVal; + } prevVal; + do { + prevVal.floatVal = *source; + newVal.floatVal = prevVal.floatVal + operand; + } while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal); +} + +__kernel void memsetKernel( + float val, + __global float * image, + int width, + int height, + int step, // in element + int offset + ) +{ + if(get_global_id(0) >= width || get_global_id(1) >= height) + { + return; + } + image += offset; + image[get_global_id(0) + get_global_id(1) * step] = val; +} + +__kernel void normalizeKernel( + __global float * buffer, + int width, + int height, + int step, + int f_offset, + int d_offset + ) +{ + __global float * factors = buffer + f_offset; + __global float * dst = buffer + d_offset; + + int j = get_global_id(0); + int i = get_global_id(1); + + if(j >= width || i >= height) + { + return; + } + float scale = factors[step * i + j]; + float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale); + + dst[step * i + j] *= invScale; +} + +__kernel void forwardWarpKernel( + __global const float * src, + __global float * buffer, + __global const float * u, + __global const float * v, + const int w, + const int h, + const int flow_stride, + const int image_stride, + const int factor_offset, + const int dst_offset, + const float time_scale + ) +{ + int j = get_global_id(0); + int i = get_global_id(1); + + if (i >= h || j >= w) return; + + volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset; + volatile __global float * dst = (volatile __global float *)buffer + dst_offset; + + int flow_row_offset = i * flow_stride; + int image_row_offset = i * image_stride; + + //bottom left corner of a target pixel + float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f; + float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f; + // pixel containing bottom left corner + float px; + float py; + float dx = modf(cx, &px); + float dy = modf(cy, &py); + // target pixel integer coords + int tx; + int ty; + tx = (int) px; + ty = (int) py; + float value = src[image_row_offset + j]; + float weight; + // fill pixel containing bottom right corner + if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) + { + weight = dx * dy; + atomic_addf(dst + ty * image_stride + tx, value * weight); + atomic_addf(normalization_factor + ty * image_stride + tx, weight); + } + + // fill pixel containing bottom left corner + tx -= 1; + if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) + { + weight = (1.0f - dx) * dy; + atomic_addf(dst + ty * image_stride + tx, value * weight); + atomic_addf(normalization_factor + ty * image_stride + tx, weight); + } + + // fill pixel containing upper left corner + ty -= 1; + if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) + { + weight = (1.0f - dx) * (1.0f - dy); + atomic_addf(dst + ty * image_stride + tx, value * weight); + atomic_addf(normalization_factor + ty * image_stride + tx, weight); + } + + // fill pixel containing upper right corner + tx += 1; + if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0))) + { + weight = dx * (1.0f - dy); + atomic_addf(dst + ty * image_stride + tx, value * weight); + atomic_addf(normalization_factor + ty * image_stride + tx, weight); + } +} + +// define buffer offsets +enum +{ + O0_OS = 0, + O1_OS, + U_OS, + V_OS, + UR_OS, + VR_OS +}; + +__kernel void blendFramesKernel( + image2d_t tex_src0, + image2d_t tex_src1, + __global float * buffer, + __global float * out, + int w, + int h, + int step, + float theta + ) +{ + __global float * u = buffer + h * step * U_OS; + __global float * v = buffer + h * step * V_OS; + __global float * ur = buffer + h * step * UR_OS; + __global float * vr = buffer + h * step * VR_OS; + __global float * o0 = buffer + h * step * O0_OS; + __global float * o1 = buffer + h * step * O1_OS; + + int ix = get_global_id(0); + int iy = get_global_id(1); + + if(ix >= w || iy >= h) return; + + int pos = ix + step * iy; + + float _u = u[pos]; + float _v = v[pos]; + + float _ur = ur[pos]; + float _vr = vr[pos]; + + float x = (float)ix + 0.5f; + float y = (float)iy + 0.5f; + bool b0 = o0[pos] > 1e-4f; + bool b1 = o1[pos] > 1e-4f; + + float2 coord0 = (float2)(x - _u * theta, y - _v * theta); + float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta)); + + if (b0 && b1) + { + // pixel is visible on both frames + out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) + + read_imagef(tex_src1, sampler, coord1).x * theta; + } + else if (b0) + { + // visible on the first frame only + out[pos] = read_imagef(tex_src0, sampler, coord0).x; + } + else + { + // visible on the second frame only + out[pos] = read_imagef(tex_src1, sampler, coord1).x; + } +} diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index bf209fd..d5b017c 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -52,7 +52,10 @@ using namespace cv::ocl; using namespace std; #if !defined (HAVE_OPENCL) -void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } +void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &) +{ + throw_nogpu(); +} #else //helper routines namespace cv @@ -64,443 +67,430 @@ namespace cv } } -namespace cv { namespace ocl +namespace cv { - void matchTemplate_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + namespace ocl + { + void matchTemplate_SQDIFF( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplate_SQDIFF_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + void matchTemplate_SQDIFF_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplate_CCORR( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + void matchTemplate_CCORR( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplate_CCORR_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + void matchTemplate_CCORR_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplate_CCOFF( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + void matchTemplate_CCOFF( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplate_CCOFF_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf); + void matchTemplate_CCOFF_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf); - void matchTemplateNaive_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, int cn); + void matchTemplateNaive_SQDIFF( + const oclMat &image, const oclMat &templ, oclMat &result, int cn); - void matchTemplateNaive_CCORR( - const oclMat& image, const oclMat& templ, oclMat& result, int cn); + void matchTemplateNaive_CCORR( + const oclMat &image, const oclMat &templ, oclMat &result, int cn); - // Evaluates optimal template's area threshold. If - // template's area is less than the threshold, we use naive match - // template version, otherwise FFT-based (if available) - int getTemplateThreshold(int method, int depth) - { - switch (method) + // Evaluates optimal template's area threshold. If + // template's area is less than the threshold, we use naive match + // template version, otherwise FFT-based (if available) + int getTemplateThreshold(int method, int depth) { - case CV_TM_CCORR: - if (depth == CV_32F) return 250; - if (depth == CV_8U) return 300; - break; - case CV_TM_SQDIFF: - if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F - if (depth == CV_8U) return 300; - break; + switch (method) + { + case CV_TM_CCORR: + if (depth == CV_32F) return 250; + if (depth == CV_8U) return 300; + break; + case CV_TM_SQDIFF: + if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F + if (depth == CV_8U) return 300; + break; + } + CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode"); + return 0; } - CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode"); - return 0; - } - ////////////////////////////////////////////////////////////////////// - // SQDIFF - void matchTemplate_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &) - { - result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); - if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) + ////////////////////////////////////////////////////////////////////// + // SQDIFF + void matchTemplate_SQDIFF( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &) { - matchTemplateNaive_SQDIFF(image, templ, result, image.channels()); - return; + result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); + if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) + { + matchTemplateNaive_SQDIFF(image, templ, result, image.channels()); + return; + } + else + { + // TODO + CV_Error(CV_StsBadArg, "Not supported yet for this size template"); + } } - else + + void matchTemplate_SQDIFF_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - // TODO - CV_Error(CV_StsBadArg, "Not supported yet for this size template"); - } - } + matchTemplate_CCORR(image, templ, result, buf); + buf.image_sums.resize(1); - void matchTemplate_SQDIFF_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) - { - matchTemplate_CCORR(image,templ,result,buf); - buf.image_sums.resize(1); + integral(image.reshape(1), buf.image_sums[0]); - integral(image.reshape(1), buf.image_sums[0]); + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; -#if SQRSUM_FIXED - unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; -#else - Mat sqr_mat = templ.reshape(1); - unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0]; -#endif - - Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); - } + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; + vector< pair > args; - void matchTemplateNaive_SQDIFF( - const oclMat& image, const oclMat& templ, oclMat& result, int) - { - CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) - || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) - ); - CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); - CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); - - Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Naive_SQDIFF"; - - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); - } + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - ////////////////////////////////////////////////////////////////////// - // CCORR - void matchTemplate_CCORR( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) - { - result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); - if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); + } + + void matchTemplateNaive_SQDIFF( + const oclMat &image, const oclMat &templ, oclMat &result, int) { - matchTemplateNaive_CCORR(image, templ, result, image.channels()); - return; + CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) + || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) + ); + CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.oclchannels() == 4) && result.channels() == 1); + CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); + + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Naive_SQDIFF"; + + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth()); } - else + + ////////////////////////////////////////////////////////////////////// + // CCORR + void matchTemplate_CCORR( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - CV_Error(CV_StsBadArg, "Not supported yet for this size template"); - if(image.depth() == CV_8U && templ.depth() == CV_8U) + result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); + if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth())) { - image.convertTo(buf.imagef, CV_32F); - templ.convertTo(buf.templf, CV_32F); + matchTemplateNaive_CCORR(image, templ, result, image.channels()); + return; + } + else + { + CV_Error(CV_StsBadArg, "Not supported yet for this size template"); + if(image.depth() == CV_8U && templ.depth() == CV_8U) + { + image.convertTo(buf.imagef, CV_32F); + templ.convertTo(buf.templf, CV_32F); + } + CV_Assert(image.channels() == 1); + oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels())); + filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0)); + result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1)); } - CV_Assert(image.channels() == 1); - oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels())); - filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0)); - result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1)); } - } - void matchTemplate_CCORR_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) - { - matchTemplate_CCORR(image,templ,result,buf); - buf.image_sums.resize(1); - buf.image_sqsums.resize(1); + void matchTemplate_CCORR_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) + { + matchTemplate_CCORR(image, templ, result, buf); + buf.image_sums.resize(1); + buf.image_sqsums.resize(1); - integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); -#if SQRSUM_FIXED - unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; -#else - oclMat templ_c1 = templ.reshape(1); - multiply(templ_c1, templ_c1, templ_c1); - unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0]; -#endif - Context *clCxt = image.clCxt; - string kernelName = "normalizeKernel"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); - } + integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); + + unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; + + Context *clCxt = image.clCxt; + string kernelName = "normalizeKernel"; + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U); + } - void matchTemplateNaive_CCORR( - const oclMat& image, const oclMat& templ, oclMat& result, int) - { - CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) - || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) - ); - CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1); - CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); - - Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Naive_CCORR"; - - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); - } - ////////////////////////////////////////////////////////////////////// - // CCOFF - void matchTemplate_CCOFF( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) - { - CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U); - - matchTemplate_CCORR(image,templ,result,buf); - - Context *clCxt = image.clCxt; - string kernelName; - - kernelName = "matchTemplate_Prepared_CCOFF"; - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - // to be continued in the following section - if(image.channels() == 1) + void matchTemplateNaive_CCORR( + const oclMat &image, const oclMat &templ, oclMat &result, int) { - buf.image_sums.resize(1); - integral(image, buf.image_sums[0]); - - float templ_sum = 0; - templ_sum = (float)sum(templ)[0] / templ.size().area(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); + CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U ) + || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F) + ); + CV_Assert(image.channels() == templ.channels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.channels() == 1); + CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); + + Context *clCxt = image.clCxt; + string kernelName = "matchTemplate_Naive_CCORR"; + + vector< pair > args; + + args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth()); } - else + ////////////////////////////////////////////////////////////////////// + // CCOFF + void matchTemplate_CCOFF( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - Vec4f templ_sum = Vec4f::all(0); - split(image,buf.images); - templ_sum = sum(templ) / templ.size().area(); - buf.image_sums.resize(buf.images.size()); - - - for(int i = 0; i < image.channels(); i ++) + CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U); + + matchTemplate_CCORR(image, templ, result, buf); + + Context *clCxt = image.clCxt; + string kernelName; + + kernelName = "matchTemplate_Prepared_CCOFF"; + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + // to be continued in the following section + if(image.channels() == 1) { - integral(buf.images[i], buf.image_sums[i]); - } - switch(image.channels()) - { - case 4: + buf.image_sums.resize(1); + integral(image, buf.image_sums[0]); + + float templ_sum = 0; + templ_sum = (float)sum(templ)[0] / templ.size().area(); args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); - break; - default: - CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); - break; + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) ); } + else + { + Vec4f templ_sum = Vec4f::all(0); + split(image, buf.images); + templ_sum = sum(templ) / templ.size().area(); + buf.image_sums.resize(buf.images.size()); + + + for(int i = 0; i < image.channels(); i ++) + { + integral(buf.images[i], buf.image_sums[i]); + } + switch(image.oclchannels()) + { + case 4: + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); + break; + default: + CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + break; + } + } + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth()); } - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); - } - void matchTemplate_CCOFF_NORMED( - const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf) - { - image.convertTo(buf.imagef, CV_32F); - templ.convertTo(buf.templf, CV_32F); - - matchTemplate_CCORR(buf.imagef, buf.templf, result, buf); - float scale = 1.f/templ.size().area(); - - Context *clCxt = image.clCxt; - string kernelName; - - kernelName = "matchTemplate_Prepared_CCOFF_NORMED"; - size_t globalThreads[3] = {result.cols, result.rows, 1}; - size_t localThreads[3] = {32, 8, 1}; - - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - args.push_back( make_pair( sizeof(cl_float),(void *)&scale) ); - // to be continued in the following section - if(image.channels() == 1) + void matchTemplate_CCOFF_NORMED( + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - buf.image_sums.resize(1); - buf.image_sqsums.resize(1); - integral(image, buf.image_sums[0], buf.image_sqsums[0]); - float templ_sum = 0; - float templ_sqsum = 0; - templ_sum = (float)sum(templ)[0]; -#if SQRSUM_FIXED - templ_sqsum = sqrSum(templ)[0]; -#else - oclMat templ_sqr = templ; - multiply(templ,templ, templ_sqr); - templ_sqsum = saturate_cast(sum(templ_sqr)[0]); -#endif //SQRSUM_FIXED - templ_sqsum -= scale * templ_sum * templ_sum; - templ_sum *= scale; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) ); - } - else - { - Vec4f templ_sum = Vec4f::all(0); - Vec4f templ_sqsum = Vec4f::all(0); - - split(image,buf.images); - templ_sum = sum(templ); -#if SQRSUM_FIXED - templ_sqsum = sqrSum(templ); -#else - oclMat templ_sqr = templ; - multiply(templ,templ, templ_sqr); - templ_sqsum = sum(templ_sqr); -#endif //SQRSUM_FIXED - templ_sqsum -= scale * templ_sum * templ_sum; - - float templ_sqsum_sum = 0; - for(int i = 0; i < image.channels(); i ++) + image.convertTo(buf.imagef, CV_32F); + templ.convertTo(buf.templf, CV_32F); + + matchTemplate_CCORR(buf.imagef, buf.templf, result, buf); + float scale = 1.f / templ.size().area(); + + Context *clCxt = image.clCxt; + string kernelName; + + kernelName = "matchTemplate_Prepared_CCOFF_NORMED"; + size_t globalThreads[3] = {result.cols, result.rows, 1}; + size_t localThreads[3] = {32, 8, 1}; + + vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + args.push_back( make_pair( sizeof(cl_float), (void *)&scale) ); + // to be continued in the following section + if(image.channels() == 1) { - templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; - } - templ_sum *= scale; - buf.image_sums.resize(buf.images.size()); - buf.image_sqsums.resize(buf.images.size()); + buf.image_sums.resize(1); + buf.image_sqsums.resize(1); + integral(image, buf.image_sums[0], buf.image_sqsums[0]); + float templ_sum = 0; + float templ_sqsum = 0; + templ_sum = (float)sum(templ)[0]; - for(int i = 0; i < image.channels(); i ++) - { - integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]); - } + templ_sqsum = sqrSum(templ)[0]; + + templ_sqsum -= scale * templ_sum * templ_sum; + templ_sum *= scale; - switch(image.channels()) - { - case 4: args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) ); - args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) ); - break; - default: - CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); - break; + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum) ); } + else + { + Vec4f templ_sum = Vec4f::all(0); + Vec4f templ_sqsum = Vec4f::all(0); + + split(image, buf.images); + templ_sum = sum(templ); + + templ_sqsum = sqrSum(templ); + + templ_sqsum -= scale * templ_sum * templ_sum; + + float templ_sqsum_sum = 0; + for(int i = 0; i < image.oclchannels(); i ++) + { + templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; + } + templ_sum *= scale; + buf.image_sums.resize(buf.images.size()); + buf.image_sqsums.resize(buf.images.size()); + + for(int i = 0; i < image.oclchannels(); i ++) + { + integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]); + } + + switch(image.oclchannels()) + { + case 4: + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); + args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); + args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); + args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) ); + break; + default: + CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + break; + } + } + openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth()); } - openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth()); - } -}/*ocl*/} /*cv*/ + }/*ocl*/ +} /*cv*/ -void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method) +void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method) { MatchTemplateBuf buf; - matchTemplate(image,templ, result, method,buf); + matchTemplate(image, templ, result, method, buf); } -void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf) +void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf) { CV_Assert(image.type() == templ.type()); CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows); - typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&); + typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &); - const Caller callers[] = { - ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED, - ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED, + const Caller callers[] = + { + ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED, + ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED, ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED }; diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index f52af24..3317d68 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -45,7 +45,7 @@ #include "precomp.hpp" -#define ALIGN 32 +#define ALIGN 32 #define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN using namespace cv; @@ -62,32 +62,32 @@ namespace cv { namespace ocl { - void oclMat::upload(const Mat& /*m*/) + void oclMat::upload(const Mat & /*m*/) { throw_nogpu(); } - void oclMat::download(cv::Mat& /*m*/) const + void oclMat::download(cv::Mat & /*m*/) const { throw_nogpu(); } - void oclMat::copyTo( oclMat& /*m*/ ) const + void oclMat::copyTo( oclMat & /*m*/ ) const { throw_nogpu(); } - void oclMat::copyTo( oclMat& /*m*/, const oclMat&/* mask */) const + void oclMat::copyTo( oclMat & /*m*/, const oclMat &/* mask */) const { throw_nogpu(); } - void oclMat::convertTo( oclMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const + void oclMat::convertTo( oclMat & /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { throw_nogpu(); } - oclMat &oclMat::operator = (const Scalar& /*s*/) + oclMat &oclMat::operator = (const Scalar & /*s*/) { throw_nogpu(); return *this; } - oclMat &oclMat::setTo(const Scalar& /*s*/, const oclMat& /*mask*/) + oclMat &oclMat::setTo(const Scalar & /*s*/, const oclMat & /*mask*/) { throw_nogpu(); return *this; @@ -120,7 +120,7 @@ namespace cv extern const char *operator_convertTo; extern const char *operator_setTo; extern const char *operator_setToM; - extern const char *convertC3C4; + extern const char *convertC3C4; } } @@ -128,11 +128,11 @@ namespace cv // convert_C3C4 void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep) { - int dstStep_in_pixel = dst.step1() / dst.channels(); - int pixel_end = dst.wholecols * dst.wholerows -1; + int dstStep_in_pixel = dst.step1() / dst.oclchannels(); + int pixel_end = dst.wholecols * dst.wholerows - 1; Context *clCxt = dst.clCxt; string kernelName = "convertC3C4"; - char compile_option[32]; + char compile_option[32]; switch(dst.depth()) { case 0: @@ -156,8 +156,8 @@ void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep) case 6: sprintf(compile_option, "-D GENTYPE4=double4"); break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } vector< pair > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src)); @@ -167,20 +167,20 @@ void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep) args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); - size_t globalThreads[3] = {((dst.wholecols *dst.wholerows+3)/4 + 255) / 256 * 256, 1, 1}; + size_t globalThreads[3] = {((dst.wholecols * dst.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; - openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option); + openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } //////////////////////////////////////////////////////////////////////// // convert_C4C3 void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep) { - int srcStep_in_pixel = src.step1() / src.channels(); - int pixel_end = src.wholecols*src.wholerows -1; + int srcStep_in_pixel = src.step1() / src.oclchannels(); + int pixel_end = src.wholecols * src.wholerows - 1; Context *clCxt = src.clCxt; string kernelName = "convertC4C3"; - char compile_option[32]; + char compile_option[32]; switch(src.depth()) { case 0: @@ -204,8 +204,8 @@ void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep) case 6: sprintf(compile_option, "-D GENTYPE4=double4"); break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } vector< pair > args; @@ -216,10 +216,10 @@ void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep) args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel)); args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end)); - size_t globalThreads[3] = {((src.wholecols *src.wholerows+3)/4 + 255) / 256 * 256, 1, 1}; + size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1}; size_t localThreads[3] = {256, 1, 1}; - openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option); + openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option); } void cv::ocl::oclMat::upload(const Mat &m) @@ -228,100 +228,100 @@ void cv::ocl::oclMat::upload(const Mat &m) Size wholeSize; Point ofs; m.locateROI(wholeSize, ofs); - int type = m.type(); - if(m.channels() == 3) - { - type = CV_MAKETYPE(m.depth(), 4); - } - create(wholeSize, type); + // int type = m.type(); + // if(m.oclchannels() == 3) + //{ + // type = CV_MAKETYPE(m.depth(), 4); + //} + create(wholeSize, m.type()); if(m.channels() == 3) { - int pitch = wholeSize.width * 3 * m.elemSize1(); - int tail_padding = m.elemSize1()*3072; - int err; - cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE, - (pitch*wholeSize.height+tail_padding-1)/tail_padding*tail_padding,0,&err); - openCLVerifyCall(err); - - openCLMemcpy2D(clCxt,temp,pitch,m.datastart,m.step,wholeSize.width*m.elemSize(),wholeSize.height,clMemcpyHostToDevice,3); - convert_C3C4(temp, *this, pitch); - //int* cputemp=new int[wholeSize.height*wholeSize.width * 3]; - //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, - // 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL)); - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, - // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); - //for(int i=0;istep/sizeof(int); - // for(int j=0;jimpl->clContext, CL_MEM_READ_WRITE, + (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err); + openCLVerifyCall(err); + + openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3); + convert_C3C4(temp, *this, pitch); + //int* cputemp=new int[wholeSize.height*wholeSize.width * 3]; + //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; + //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, + // 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL)); + //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, + // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); + //for(int i=0;istep/sizeof(int); + // for(int j=0;jempty()); - int t = type(); - if(download_channels == 3) - { - t = CV_MAKETYPE(depth(), 3); - } - m.create(wholerows, wholecols, t); - - if(download_channels == 3) + // int t = type(); + // if(download_channels == 3) + //{ + // t = CV_MAKETYPE(depth(), 3); + //} + m.create(wholerows, wholecols, type()); + + if(m.channels() == 3) { - int pitch = wholecols * 3 * m.elemSize1(); - int tail_padding = m.elemSize1()*3072; - int err; - cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE, - (pitch*wholerows+tail_padding-1)/tail_padding*tail_padding,0,&err); - openCLVerifyCall(err); - - convert_C4C3(*this, temp, pitch/m.elemSize1()); - openCLMemcpy2D(clCxt,m.data,m.step,temp,pitch,wholecols*m.elemSize(),wholerows,clMemcpyDeviceToHost,3); - //int* cputemp=new int[wholecols*wholerows * 3]; - //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, - // 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL)); - //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, - // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); - //for(int i=0;istep/sizeof(int); - // for(int j=0;jimpl->clContext, CL_MEM_READ_WRITE, + (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err); + openCLVerifyCall(err); + + convert_C4C3(*this, temp, pitch / m.elemSize1()); + openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3); + //int* cputemp=new int[wholecols*wholerows * 3]; + //int* cpudata=new int[this->step*this->wholerows/sizeof(int)]; + //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE, + // 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL)); + //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE, + // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL)); + //for(int i=0;istep/sizeof(int); + // for(int j=0;j > args; @@ -349,8 +349,8 @@ void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, strin {"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"}, {"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"} }; - char compile_option[32]; - sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.channels()-1][dst.depth()].c_str()); + char compile_option[32]; + sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str()); size_t localThreads[3] = {16, 16, 1}; size_t globalThreads[3]; @@ -374,7 +374,7 @@ void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, strin args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option); + localThreads, args, -1, -1, compile_option); } void cv::ocl::oclMat::copyTo( oclMat &m ) const @@ -432,7 +432,7 @@ void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta) args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads, - localThreads, args, dst.channels(), dst.depth()); + localThreads, args, dst.oclchannels(), dst.depth()); } void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const { @@ -486,177 +486,177 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; } - char compile_option[32]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - }val; + char compile_option[32]; + union sc + { + cl_uchar4 uval; + cl_char4 cval; + cl_ushort4 usval; + cl_short4 shval; + cl_int4 ival; + cl_float4 fval; + cl_double4 dval; + } val; switch(dst.depth()) { case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar"); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4"); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.uval.s[0] = saturate_cast(scalar.val[0]); + val.uval.s[1] = saturate_cast(scalar.val[1]); + val.uval.s[2] = saturate_cast(scalar.val[2]); + val.uval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=uchar"); + args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=uchar4"); + args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char"); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4"); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.cval.s[0] = saturate_cast(scalar.val[0]); + val.cval.s[1] = saturate_cast(scalar.val[1]); + val.cval.s[2] = saturate_cast(scalar.val[2]); + val.cval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=char"); + args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=char4"); + args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort"); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4"); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.usval.s[0] = saturate_cast(scalar.val[0]); + val.usval.s[1] = saturate_cast(scalar.val[1]); + val.usval.s[2] = saturate_cast(scalar.val[2]); + val.usval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=ushort"); + args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=ushort4"); + args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short"); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4"); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.shval.s[0] = saturate_cast(scalar.val[0]); + val.shval.s[1] = saturate_cast(scalar.val[1]); + val.shval.s[2] = saturate_cast(scalar.val[2]); + val.shval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=short"); + args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=short4"); + args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int"); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 2: - sprintf(compile_option, "-D GENTYPE=int2"); - cl_int2 i2val; - i2val.s[0] = val.ival.s[0]; - i2val.s[1] = val.ival.s[1]; - args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4"); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.ival.s[0] = saturate_cast(scalar.val[0]); + val.ival.s[1] = saturate_cast(scalar.val[1]); + val.ival.s[2] = saturate_cast(scalar.val[2]); + val.ival.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=int"); + args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); + break; + case 2: + sprintf(compile_option, "-D GENTYPE=int2"); + cl_int2 i2val; + i2val.s[0] = val.ival.s[0]; + i2val.s[1] = val.ival.s[1]; + args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=int4"); + args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float"); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4"); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.fval.s[0] = scalar.val[0]; + val.fval.s[1] = scalar.val[1]; + val.fval.s[2] = scalar.val[2]; + val.fval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=float"); + args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=float4"); + args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double"); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4"); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.dval.s[0] = scalar.val[0]; + val.dval.s[1] = scalar.val[1]; + val.dval.s[2] = scalar.val[2]; + val.dval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=double"); + args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=double4"); + args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } #if CL_VERSION_1_2 - if(dst.offset==0 && dst.cols==dst.wholecols) - { - clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL); - } - else - { - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); - openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option); - } + if(dst.offset == 0 && dst.cols == dst.wholecols) + { + clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); + } + else + { + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); + openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, + localThreads, args, -1, -1, compile_option); + } #else args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); @@ -664,7 +664,7 @@ void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, string kern args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option); + localThreads, args, -1, -1, compile_option); #endif } @@ -678,154 +678,154 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat & globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1]; globalThreads[2] = 1; int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize(); - char compile_option[32]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - }val; + char compile_option[32]; + union sc + { + cl_uchar4 uval; + cl_char4 cval; + cl_ushort4 usval; + cl_short4 shval; + cl_int4 ival; + cl_float4 fval; + cl_double4 dval; + } val; switch(dst.depth()) { case CV_8U: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar"); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4"); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.uval.s[0] = saturate_cast(scalar.val[0]); + val.uval.s[1] = saturate_cast(scalar.val[1]); + val.uval.s[2] = saturate_cast(scalar.val[2]); + val.uval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=uchar"); + args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=uchar4"); + args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_8S: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char"); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4"); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.cval.s[0] = saturate_cast(scalar.val[0]); + val.cval.s[1] = saturate_cast(scalar.val[1]); + val.cval.s[2] = saturate_cast(scalar.val[2]); + val.cval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=char"); + args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=char4"); + args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_16U: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort"); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4"); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.usval.s[0] = saturate_cast(scalar.val[0]); + val.usval.s[1] = saturate_cast(scalar.val[1]); + val.usval.s[2] = saturate_cast(scalar.val[2]); + val.usval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=ushort"); + args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=ushort4"); + args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_16S: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short"); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4"); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.shval.s[0] = saturate_cast(scalar.val[0]); + val.shval.s[1] = saturate_cast(scalar.val[1]); + val.shval.s[2] = saturate_cast(scalar.val[2]); + val.shval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=short"); + args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=short4"); + args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_32S: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int"); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4"); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.ival.s[0] = saturate_cast(scalar.val[0]); + val.ival.s[1] = saturate_cast(scalar.val[1]); + val.ival.s[2] = saturate_cast(scalar.val[2]); + val.ival.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=int"); + args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=int4"); + args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_32F: - val.fval.s[0] = scalar.val[0]; - val.fval.s[1] = scalar.val[1]; - val.fval.s[2] = scalar.val[2]; - val.fval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float"); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4"); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.fval.s[0] = scalar.val[0]; + val.fval.s[1] = scalar.val[1]; + val.fval.s[2] = scalar.val[2]; + val.fval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=float"); + args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=float4"); + args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case CV_64F: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double"); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4"); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.dval.s[0] = scalar.val[0]; + val.dval.s[1] = scalar.val[1]; + val.dval.s[2] = scalar.val[2]; + val.dval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=double"); + args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=double4"); + args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); @@ -836,7 +836,7 @@ void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat & args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step )); args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset )); openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option); + localThreads, args, -1, -1, compile_option); } oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) @@ -855,18 +855,18 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) // (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0)); if (mask.empty()) { - if(type()==CV_8UC1) - { - set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0"); - } - else - { - set_to_withoutmask_run(*this, scalar, "set_to_without_mask"); - } + if(type() == CV_8UC1) + { + set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0"); + } + else + { + set_to_withoutmask_run(*this, scalar, "set_to_without_mask"); + } } else { - set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); + set_to_withmask_run(*this, scalar, mask, "set_to_with_mask"); } return *this; @@ -874,51 +874,92 @@ oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask) oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const { - if( new_rows != 0 && new_rows != rows) - { - CV_Error( CV_StsBadFunc, - "oclMat's number of rows can not be changed for current version" ); - } - - oclMat hdr = *this; - - int cn = channels(); - if (new_cn == 0) - new_cn = cn; - - int total_width = cols * cn; - - if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) - new_rows = rows * total_width / new_cn; - - if (new_rows != 0 && new_rows != rows) - { - int total_size = total_width * rows; - - if (!isContinuous()) - CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); - - if ((unsigned)new_rows > (unsigned)total_size) - CV_Error(CV_StsOutOfRange, "Bad new number of rows"); - - total_width = total_size / new_rows; - - if (total_width * new_rows != total_size) - CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); - - hdr.rows = new_rows; - hdr.step = total_width * elemSize1(); - } - - int new_width = total_width / new_cn; - - if (new_width * new_cn != total_width) - CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels"); - - hdr.cols = new_width; - hdr.wholecols = new_width; - hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); - + if( new_rows != 0 && new_rows != rows) + + { + + CV_Error( CV_StsBadFunc, + + "oclMat's number of rows can not be changed for current version" ); + + } + + oclMat hdr = *this; + + int cn = oclchannels(); + + if (new_cn == 0) + + new_cn = cn; + + + + int total_width = cols * cn; + + + + if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) + + new_rows = rows * total_width / new_cn; + + + + if (new_rows != 0 && new_rows != rows) + + { + + int total_size = total_width * rows; + + + + if (!isContinuous()) + + CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); + + + + if ((unsigned)new_rows > (unsigned)total_size) + + CV_Error(CV_StsOutOfRange, "Bad new number of rows"); + + + + total_width = total_size / new_rows; + + + + if (total_width * new_rows != total_size) + + CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); + + + + hdr.rows = new_rows; + + hdr.step = total_width * elemSize1(); + + } + + + + int new_width = total_width / new_cn; + + + + if (new_width * new_cn != total_width) + + CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels"); + + + + hdr.cols = new_width; + + hdr.wholecols = new_width; + + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); + + + return hdr; } @@ -926,15 +967,13 @@ oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const void cv::ocl::oclMat::create(int _rows, int _cols, int _type) { clCxt = Context::getContext(); - //cout << "cv::ocl::oclMat::create()." << endl; - /* core logic */ _type &= TYPE_MASK; - download_channels = CV_MAT_CN(_type); - if(download_channels==3) - { - _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4); - } + //download_channels = CV_MAT_CN(_type); + //if(download_channels==3) + //{ + // _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4); + //} if( rows == _rows && cols == _cols && type() == _type && data ) return; if( data ) @@ -953,7 +992,7 @@ void cv::ocl::oclMat::create(int _rows, int _cols, int _type) openCLMallocPitch(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows); //openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows); - if (esz *cols == step) + if (esz * cols == step) flags |= Mat::CONTINUOUS_FLAG; int64 _nettosize = (int64)step * rows; @@ -979,7 +1018,6 @@ void cv::ocl::oclMat::release() step = rows = cols = 0; offset = wholerows = wholecols = 0; refcount = 0; - download_channels=0; } #endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 06078a0..c6096c3 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -63,8 +63,8 @@ namespace cv // provide additional methods for the user to interact with the command queue after a task is fired void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode) + size_t localThreads[3], vector< pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode) { //construct kernel name //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number @@ -80,7 +80,7 @@ namespace cv kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options); if ( localThreads != NULL) - { + { globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0]; globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1]; globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2]; @@ -92,7 +92,7 @@ namespace cv openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second)); openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads, - localThreads, 0, NULL, NULL)); + localThreads, 0, NULL, NULL)); switch(finish_mode) { @@ -109,19 +109,19 @@ namespace cv } void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode) + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode) { openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args, - channels, depth, NULL, finish_mode); + channels, depth, NULL, finish_mode); } void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode) { openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, - build_options, finish_mode); + build_options, finish_mode); } }//namespace ocl diff --git a/modules/ocl/src/mcwutil.hpp b/modules/ocl/src/mcwutil.hpp index 67a0764..fe2b49a 100644 --- a/modules/ocl/src/mcwutil.hpp +++ b/modules/ocl/src/mcwutil.hpp @@ -63,10 +63,10 @@ namespace cv DISABLE }; void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); + size_t localThreads[3], vector< pair > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE); void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], - size_t localThreads[3], vector< pair > &args, int channels, - int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); + size_t localThreads[3], vector< pair > &args, int channels, + int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE); }//namespace ocl }//namespace cv diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index c919420..6dcb388 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -97,13 +97,13 @@ namespace cv size_t widthInBytes, size_t height); void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch, const void *src, size_t spitch, - size_t width, size_t height, enum openCLMemcpyKind kind, int channels=-1); + size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1); void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset, const void *src, size_t spitch, size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind); void openCLFree(void *devPtr); - cl_mem openCLCreateBuffer(Context *clCxt,size_t flag, size_t size); - void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void* host_buffer, size_t size); + cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size); + void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size); cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName); cl_kernel openCLGetKernelFromSource(const Context *clCxt, @@ -113,8 +113,8 @@ namespace cv void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair > &args, int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1); void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args, int channels, int depth, const char *build_options); + size_t globalThreads[3], size_t localThreads[3], + vector< pair > &args, int channels, int depth, const char *build_options); void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], size_t localThreads[3], vector< pair > &args, int channels, int depth); void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3], @@ -128,14 +128,14 @@ namespace cv //void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr, // enum openCLMemcpyKind kind, cl_bool blocking_write); - int savetofile(const Context *clcxt, cl_program &program, const char *fileName); - struct Context::Impl - { + int savetofile(const Context *clcxt, cl_program &program, const char *fileName); + struct Context::Impl + { //Information of the OpenCL context cl_context clContext; cl_command_queue clCmdQueue; cl_device_id *devices; - string devName; + string devName; cl_uint maxDimensions; size_t maxWorkGroupSize; size_t *maxWorkItemSizes; @@ -143,8 +143,8 @@ namespace cv int double_support; //extra options to recognize vendor specific fp64 extensions char *extra_options; - string Binpath; - }; + string Binpath; + }; } } diff --git a/modules/ocl/src/pyrdown.cpp b/modules/ocl/src/pyrdown.cpp index d41931a..c05a7ae 100644 --- a/modules/ocl/src/pyrdown.cpp +++ b/modules/ocl/src/pyrdown.cpp @@ -17,7 +17,7 @@ // @Authors // Dachuan Zhao, dachuan@multicorewareinc.com // Yao Wang, yao@multicorewareinc.com -// +// // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -100,19 +100,17 @@ void pyrdown_run(const oclMat &src, const oclMat &dst) args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); - openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); + openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } ////////////////////////////////////////////////////////////////////////////// // pyrDown -void cv::ocl::pyrDown(const oclMat& src, oclMat& dst) +void cv::ocl::pyrDown(const oclMat &src, oclMat &dst) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); - dst.download_channels=src.download_channels; - pyrdown_run(src, dst); } diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index 9c06e90..a701d61 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -48,8 +48,8 @@ using namespace cv::ocl; #if !defined (HAVE_OPENCL) -void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat&, const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { } -void cv::ocl::PyrLKOpticalFlow::dense(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { } +void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { } +void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { } #else /* !defined (HAVE_OPENCL) */ @@ -83,7 +83,7 @@ struct int2 namespace { - void calcPatchSize(cv::Size winSize, int cn, dim3& block, dim3& patch, bool isDeviceArch11) + void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11) { winSize.width *= cn; @@ -144,7 +144,7 @@ void convert_run_cus(const oclMat &src, oclMat &dst, double alpha, double beta) args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f )); args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f )); openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads, - localThreads, args, dst.channels(), dst.depth(), CLFLUSH); + localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH); } void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 ); void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta ) @@ -157,7 +157,7 @@ void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double if( rtype < 0 ) rtype = src.type(); else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels()); + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels()); int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) @@ -198,177 +198,177 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string { globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0]; } - char compile_option[32]; - union sc - { - cl_uchar4 uval; - cl_char4 cval; - cl_ushort4 usval; - cl_short4 shval; - cl_int4 ival; - cl_float4 fval; - cl_double4 dval; - }val; + char compile_option[32]; + union sc + { + cl_uchar4 uval; + cl_char4 cval; + cl_ushort4 usval; + cl_short4 shval; + cl_int4 ival; + cl_float4 fval; + cl_double4 dval; + } val; switch(dst.depth()) { case 0: - val.uval.s[0] = saturate_cast(scalar.val[0]); - val.uval.s[1] = saturate_cast(scalar.val[1]); - val.uval.s[2] = saturate_cast(scalar.val[2]); - val.uval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=uchar"); - args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=uchar4"); - args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.uval.s[0] = saturate_cast(scalar.val[0]); + val.uval.s[1] = saturate_cast(scalar.val[1]); + val.uval.s[2] = saturate_cast(scalar.val[2]); + val.uval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=uchar"); + args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=uchar4"); + args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 1: - val.cval.s[0] = saturate_cast(scalar.val[0]); - val.cval.s[1] = saturate_cast(scalar.val[1]); - val.cval.s[2] = saturate_cast(scalar.val[2]); - val.cval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=char"); - args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=char4"); - args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.cval.s[0] = saturate_cast(scalar.val[0]); + val.cval.s[1] = saturate_cast(scalar.val[1]); + val.cval.s[2] = saturate_cast(scalar.val[2]); + val.cval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=char"); + args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=char4"); + args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 2: - val.usval.s[0] = saturate_cast(scalar.val[0]); - val.usval.s[1] = saturate_cast(scalar.val[1]); - val.usval.s[2] = saturate_cast(scalar.val[2]); - val.usval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=ushort"); - args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=ushort4"); - args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.usval.s[0] = saturate_cast(scalar.val[0]); + val.usval.s[1] = saturate_cast(scalar.val[1]); + val.usval.s[2] = saturate_cast(scalar.val[2]); + val.usval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=ushort"); + args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=ushort4"); + args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 3: - val.shval.s[0] = saturate_cast(scalar.val[0]); - val.shval.s[1] = saturate_cast(scalar.val[1]); - val.shval.s[2] = saturate_cast(scalar.val[2]); - val.shval.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=short"); - args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=short4"); - args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.shval.s[0] = saturate_cast(scalar.val[0]); + val.shval.s[1] = saturate_cast(scalar.val[1]); + val.shval.s[2] = saturate_cast(scalar.val[2]); + val.shval.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=short"); + args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=short4"); + args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 4: - val.ival.s[0] = saturate_cast(scalar.val[0]); - val.ival.s[1] = saturate_cast(scalar.val[1]); - val.ival.s[2] = saturate_cast(scalar.val[2]); - val.ival.s[3] = saturate_cast(scalar.val[3]); - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=int"); - args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); - break; - case 2: - sprintf(compile_option, "-D GENTYPE=int2"); - cl_int2 i2val; - i2val.s[0] = val.ival.s[0]; - i2val.s[1] = val.ival.s[1]; - args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=int4"); - args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.ival.s[0] = saturate_cast(scalar.val[0]); + val.ival.s[1] = saturate_cast(scalar.val[1]); + val.ival.s[2] = saturate_cast(scalar.val[2]); + val.ival.s[3] = saturate_cast(scalar.val[3]); + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=int"); + args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] )); + break; + case 2: + sprintf(compile_option, "-D GENTYPE=int2"); + cl_int2 i2val; + i2val.s[0] = val.ival.s[0]; + i2val.s[1] = val.ival.s[1]; + args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=int4"); + args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 5: - val.fval.s[0] = (float)scalar.val[0]; - val.fval.s[1] = (float)scalar.val[1]; - val.fval.s[2] = (float)scalar.val[2]; - val.fval.s[3] = (float)scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=float"); - args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=float4"); - args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.fval.s[0] = (float)scalar.val[0]; + val.fval.s[1] = (float)scalar.val[1]; + val.fval.s[2] = (float)scalar.val[2]; + val.fval.s[3] = (float)scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=float"); + args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=float4"); + args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; case 6: - val.dval.s[0] = scalar.val[0]; - val.dval.s[1] = scalar.val[1]; - val.dval.s[2] = scalar.val[2]; - val.dval.s[3] = scalar.val[3]; - switch(dst.channels()) - { - case 1: - sprintf(compile_option, "-D GENTYPE=double"); - args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); - break; - case 4: - sprintf(compile_option, "-D GENTYPE=double4"); - args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); - break; - default: - CV_Error(CV_StsUnsupportedFormat,"unsupported channels"); - } + val.dval.s[0] = scalar.val[0]; + val.dval.s[1] = scalar.val[1]; + val.dval.s[2] = scalar.val[2]; + val.dval.s[3] = scalar.val[3]; + switch(dst.oclchannels()) + { + case 1: + sprintf(compile_option, "-D GENTYPE=double"); + args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] )); + break; + case 4: + sprintf(compile_option, "-D GENTYPE=double4"); + args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval )); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "unsupported channels"); + } break; - default: - CV_Error(CV_StsUnsupportedFormat,"unknown depth"); + default: + CV_Error(CV_StsUnsupportedFormat, "unknown depth"); } #if CL_VERSION_1_2 - if(dst.offset==0 && dst.cols==dst.wholecols) - { - clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL); - } - else - { - args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); - args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); + if(dst.offset == 0 && dst.cols == dst.wholecols) + { + clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL); + } + else + { + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); + args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option, CLFLUSH); - } + localThreads, args, -1, -1, compile_option, CLFLUSH); + } #else args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data )); args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols )); @@ -376,7 +376,7 @@ void set_to_withoutmask_run_cus(const oclMat &dst, const Scalar &scalar, string args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel )); args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel)); openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads, - localThreads, args, -1, -1,compile_option, CLFLUSH); + localThreads, args, -1, -1, compile_option, CLFLUSH); #endif } @@ -385,30 +385,30 @@ oclMat &setTo(oclMat &src, const Scalar &scalar) CV_Assert( src.depth() >= 0 && src.depth() <= 6 ); CV_DbgAssert( !src.empty()); - if(src.type()==CV_8UC1) - { - set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0"); - } - else - { - set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask"); - } + if(src.type() == CV_8UC1) + { + set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0"); + } + else + { + set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask"); + } return src; } void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar) { - if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F) + if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } //dst.create(src1.size(), src1.type()); //CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && // src1.rows == src2.rows && src2.rows == dst.rows); - CV_Assert(src1.cols == dst.cols && + CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows); CV_Assert(src1.type() == dst.type()); @@ -429,11 +429,11 @@ void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const ch //int cols = divUp(dst.cols * channels + offset_cols, vector_length); size_t localThreads[3] = { 16, 16, 1 }; - //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - // divUp(dst.rows, localThreads[1]) * localThreads[1], - // 1 - // }; - size_t globalThreads[3] = { src1.cols, + //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], + // divUp(dst.rows, localThreads[1]) * localThreads[1], + // 1 + // }; + size_t globalThreads[3] = { src1.cols, src1.rows, 1 }; @@ -455,8 +455,8 @@ void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const ch //if(_scalar != NULL) //{ - float scalar1 = *((float *)_scalar); - args.push_back( make_pair( sizeof(float), (float *)&scalar1 )); + float scalar1 = *((float *)_scalar); + args.push_back( make_pair( sizeof(float), (float *)&scalar1 )); //} openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH); @@ -489,10 +489,10 @@ void pyrdown_run_cus(const oclMat &src, const oclMat &dst) args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); - openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth(), CLFLUSH); + openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH); } -void pyrDown_cus(const oclMat& src, oclMat& dst) +void pyrDown_cus(const oclMat &src, oclMat &dst) { CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); @@ -549,7 +549,7 @@ void pyrDown_cus(const oclMat& src, oclMat& dst) // //void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask) //{ -// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) || +// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) || // !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double))) // { // callF(src, dst, op, mask); @@ -606,94 +606,94 @@ void pyrDown_cus(const oclMat& src, oclMat& dst) // //} //} -cl_mem bindTexture(const oclMat& mat, int depth, int channels) +cl_mem bindTexture(const oclMat &mat, int depth, int channels) { - cl_mem texture; + cl_mem texture; cl_image_format format; int err; - if(depth == 0) - { - format.image_channel_data_type = CL_UNSIGNED_INT8; - } - else if(depth == 5) - { - format.image_channel_data_type = CL_FLOAT; - } - if(channels == 1) - { - format.image_channel_order = CL_R; - } - else if(channels == 3) - { - format.image_channel_order = CL_RGB; - } - else if(channels == 4) - { - format.image_channel_order = CL_RGBA; - } + if(depth == 0) + { + format.image_channel_data_type = CL_UNSIGNED_INT8; + } + else if(depth == 5) + { + format.image_channel_data_type = CL_FLOAT; + } + if(channels == 1) + { + format.image_channel_order = CL_R; + } + else if(channels == 3) + { + format.image_channel_order = CL_RGB; + } + else if(channels == 4) + { + format.image_channel_order = CL_RGBA; + } #if CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = mat.step / mat.elemSize(); + desc.image_width = mat.step / mat.elemSize(); desc.image_height = mat.rows; desc.image_depth = NULL; desc.image_array_size = 1; desc.image_row_pitch = 0; - desc.image_slice_pitch= 0; + desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( - mat.clCxt->impl->clContext, - CL_MEM_READ_WRITE, - &format, - mat.step / mat.elemSize(), - mat.rows, - 0, - NULL, - &err); + mat.clCxt->impl->clContext, + CL_MEM_READ_WRITE, + &format, + mat.step / mat.elemSize(), + mat.rows, + 0, + NULL, + &err); #endif - size_t origin[] = { 0, 0, 0 }; - size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 }; - clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0); + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 }; + clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0); openCLSafeCall(err); - return texture; + return texture; } void releaseTexture(cl_mem texture) { - openCLFree(texture); + openCLFree(texture); } -void lkSparse_run(oclMat& I, oclMat& J, - const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err, bool GET_MIN_EIGENVALS, int ptcount, - int level, /*dim3 block, */dim3 patch, Size winSize, int iters) +void lkSparse_run(oclMat &I, oclMat &J, + const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err, bool GET_MIN_EIGENVALS, int ptcount, + int level, /*dim3 block, */dim3 patch, Size winSize, int iters) { Context *clCxt = I.clCxt; string kernelName = "lkSparse"; - size_t localThreads[3] = { 8, 32, 1 }; + size_t localThreads[3] = { 8, 32, 1 }; size_t globalThreads[3] = { 8 * ptcount, 32, 1}; - int cn = I.channels(); + int cn = I.oclchannels(); - bool calcErr; + bool calcErr; if (err) { - calcErr = true; + calcErr = true; } else { - calcErr = false; + calcErr = false; } - calcErr = true; + calcErr = true; - cl_mem ITex = bindTexture(I, I.depth(), cn); - cl_mem JTex = bindTexture(J, J.depth(), cn); + cl_mem ITex = bindTexture(I, I.depth(), cn); + cl_mem JTex = bindTexture(J, J.depth(), cn); vector > args; @@ -718,13 +718,13 @@ void lkSparse_run(oclMat& I, oclMat& J, args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS )); - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH); - - releaseTexture(ITex); - releaseTexture(JTex); + openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + + releaseTexture(ITex); + releaseTexture(JTex); } -void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err) +void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err) { if (prevPts.empty()) { @@ -738,10 +738,10 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next iters = std::min(std::max(iters, 0), 100); - const int cn = prevImg.channels(); + const int cn = prevImg.oclchannels(); dim3 block, patch; - calcPatchSize(winSize, cn, block, patch, isDeviceArch11_); + calcPatchSize(winSize, cn, block, patch, isDeviceArch11_); CV_Assert(derivLambda >= 0); CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2); @@ -756,9 +756,9 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1); oclMat temp2 = nextPts.reshape(1); - //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f)); - multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); - //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); + //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f)); + multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f); + //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2); ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status); //status.setTo(Scalar::all(1)); @@ -781,12 +781,12 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next } else { - //oclMat buf_; - // cvtColor(prevImg, buf_, COLOR_BGR2BGRA); - // buf_.convertTo(prevPyr_[0], CV_32F); + //oclMat buf_; + // cvtColor(prevImg, buf_, COLOR_BGR2BGRA); + // buf_.convertTo(prevPyr_[0], CV_32F); - // cvtColor(nextImg, buf_, COLOR_BGR2BGRA); - // buf_.convertTo(nextPyr_[0], CV_32F); + // cvtColor(nextImg, buf_, COLOR_BGR2BGRA); + // buf_.convertTo(nextPyr_[0], CV_32F); } for (int level = 1; level <= maxLevel; ++level) @@ -799,16 +799,16 @@ void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& next for (int level = maxLevel; level >= 0; level--) { - lkSparse_run(prevPyr_[level], nextPyr_[level], - prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols, - level, /*block, */patch, winSize, iters); + lkSparse_run(prevPyr_[level], nextPyr_[level], + prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols, + level, /*block, */patch, winSize, iters); } - clFinish(prevImg.clCxt->impl->clCmdQueue); + clFinish(prevImg.clCxt->impl->clCmdQueue); } -void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v, - oclMat& prevU, oclMat& prevV, oclMat* err, Size winSize, int iters) +void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v, + oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters) { Context *clCxt = I.clCxt; @@ -817,22 +817,22 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v, size_t localThreads[3] = { 16, 16, 1 }; size_t globalThreads[3] = { I.cols, I.rows, 1}; - int cn = I.channels(); + int cn = I.oclchannels(); - bool calcErr; + bool calcErr; if (err) { - calcErr = true; + calcErr = true; } else { - calcErr = false; + calcErr = false; } - cl_mem ITex = bindTexture(I, I.depth(), cn); - cl_mem JTex = bindTexture(J, J.depth(), cn); + cl_mem ITex = bindTexture(I, I.depth(), cn); + cl_mem JTex = bindTexture(J, J.depth(), cn); - //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2}; + //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2}; //const int patchWidth = 16 + 2 * halfWin.x; //const int patchHeight = 16 + 2 * halfWin.y; //size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int); @@ -854,18 +854,18 @@ void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v, args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols )); //args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data )); //args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); - args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); + args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width )); + args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height )); args.push_back( make_pair( sizeof(cl_int), (void *)&iters )); args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr )); - openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH); - - releaseTexture(ITex); - releaseTexture(JTex); + openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH); + + releaseTexture(ITex); + releaseTexture(JTex); } -void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err) +void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err) { CV_Assert(prevImg.type() == CV_8UC1); CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type()); @@ -894,7 +894,7 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextI uPyr_[1].setTo(Scalar::all(0)); vPyr_[1].setTo(Scalar::all(0)); - Size winSize2i(winSize.width, winSize.height); + Size winSize2i(winSize.width, winSize.height); int idx = 0; @@ -903,7 +903,7 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextI int idx2 = (idx + 1) & 1; lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2], - level == 0 ? err : 0, winSize2i, iters); + level == 0 ? err : 0, winSize2i, iters); if (level > 0) idx = idx2; diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index 0190faa..ebd3535 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -17,7 +17,7 @@ // @Authors // Zhang Chunpeng chunpeng@multicorewareinc.com // Yao Wang, yao@multicorewareinc.com -// +// // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -55,36 +55,43 @@ using namespace cv::ocl; using namespace std; #ifndef HAVE_OPENCL -void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); } +void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &) +{ + throw_nogpu(); +} #else -namespace cv { namespace ocl -{ - extern const char *pyr_up; - void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst) - { - dst.create(src.rows * 2, src.cols * 2, src.type()); - dst.download_channels=src.download_channels; - Context *clCxt = src.clCxt; - - const std::string kernelName = "pyrUp"; - - std::vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); - - size_t globalThreads[3] = {dst.cols, dst.rows, 1}; - size_t localThreads[3] = {16, 16, 1}; - - openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); - } -}}; +namespace cv +{ + namespace ocl + { + extern const char *pyr_up; + void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst) + { + dst.create(src.rows * 2, src.cols * 2, src.type()); + + Context *clCxt = src.clCxt; + + const std::string kernelName = "pyrUp"; + + std::vector< pair > args; + args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset)); + args.push_back( make_pair( sizeof(cl_int), (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step)); + + size_t globalThreads[3] = {dst.cols, dst.rows, 1}; + size_t localThreads[3] = {16, 16, 1}; + + + openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); + } + } +}; #endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index 61ea73a..e15b06e 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -114,7 +114,7 @@ namespace cv void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst) { Context *clCxt = mat_dst.clCxt; - int channels = mat_dst.channels(); + int channels = mat_dst.oclchannels(); int depth = mat_dst.depth(); string kernelName = "merge_vector"; @@ -125,11 +125,11 @@ namespace cv {4, 4, 2, 2, 1, 1, 1} }; - size_t index = indexes[channels-1][mat_dst.depth()]; + size_t index = indexes[channels - 1][mat_dst.depth()]; int cols = divUp(mat_dst.cols, index); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(mat_dst.rows, localThreads[1]) * localThreads[1], + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(mat_dst.rows, localThreads[1]) *localThreads[1], 1 }; @@ -158,14 +158,14 @@ namespace cv void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst) { - if(mat_dst.clCxt -> impl -> double_support ==0 && mat_dst.type() == CV_64F) + if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } Context *clCxt = mat_dst.clCxt; - int channels = mat_dst.channels(); + int channels = mat_dst.oclchannels(); int depth = mat_dst.depth(); string kernelName = "merge_vector"; @@ -176,15 +176,15 @@ namespace cv {1, 1, 1, 1, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][depth]; + size_t vector_length = vector_lengths[channels - 1][depth]; int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1); int cols = divUp(mat_dst.cols + offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(mat_dst.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(mat_dst.rows, localThreads[1]) *localThreads[1], + 1 + }; int dst_step1 = mat_dst.cols * mat_dst.elemSize(); vector > args; @@ -206,7 +206,7 @@ namespace cv // if channel == 3, then the matrix will convert to channel =4 //if(n == 3) - // args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols)); + // args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols)); if(n == 3) { @@ -214,7 +214,7 @@ namespace cv args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].offset)); } - else if( n== 4) + else if( n == 4) { args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data)); args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step)); @@ -243,7 +243,7 @@ namespace cv CV_Assert(depth == mat_src[i].depth()); CV_Assert(size == mat_src[i].size()); - total_channels += mat_src[i].channels(); + total_channels += mat_src[i].oclchannels(); } CV_Assert(total_channels <= 4); @@ -263,7 +263,7 @@ namespace cv void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst) { Context *clCxt = mat_src.clCxt; - int channels = mat_src.channels(); + int channels = mat_src.oclchannels(); int depth = mat_src.depth(); string kernelName = "split_vector"; @@ -274,13 +274,13 @@ namespace cv {4, 4, 2, 2, 1, 1, 1} }; - size_t index = indexes[channels-1][mat_dst[0].depth()]; + size_t index = indexes[channels - 1][mat_dst[0].depth()]; int cols = divUp(mat_src.cols, index); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(mat_src.rows, localThreads[1]) * localThreads[1], - 1 - }; + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(mat_src.rows, localThreads[1]) *localThreads[1], + 1 + }; vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data)); @@ -307,14 +307,14 @@ namespace cv void split_vector_run(const oclMat &mat_src, oclMat *mat_dst) { - if(mat_src.clCxt -> impl -> double_support ==0 && mat_src.type() == CV_64F) + if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F) { - CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n"); + CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n"); return; } Context *clCxt = mat_src.clCxt; - int channels = mat_src.channels(); + int channels = mat_src.oclchannels(); int depth = mat_src.depth(); string kernelName = "split_vector"; @@ -325,7 +325,7 @@ namespace cv {4, 4, 2, 2, 1, 1, 1} }; - size_t vector_length = vector_lengths[channels-1][mat_dst[0].depth()]; + size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()]; int max_offset_cols = 0; for(int i = 0; i < channels; i++) @@ -339,8 +339,8 @@ namespace cv : divUp(mat_src.cols + max_offset_cols, vector_length); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0], - divUp(mat_src.rows, localThreads[1]) * localThreads[1], 1 + size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0], + divUp(mat_src.rows, localThreads[1]) *localThreads[1], 1 }; int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize(); @@ -379,7 +379,7 @@ namespace cv CV_Assert(mat_dst); int depth = mat_src.depth(); - int num_channels = mat_src.channels(); + int num_channels = mat_src.oclchannels(); Size size = mat_src.size(); if(num_channels == 1) @@ -413,8 +413,8 @@ void cv::ocl::split(const oclMat &src, oclMat *dst) } void cv::ocl::split(const oclMat &src, vector &dst) { - dst.resize(src.channels()); - if(src.channels() > 0) + dst.resize(src.oclchannels()); + if(src.oclchannels() > 0) split_merge::split(src, &dst[0]); } #endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp index a59ae7c..17ab88d 100644 --- a/modules/ocl/src/surf.cpp +++ b/modules/ocl/src/surf.cpp @@ -44,7 +44,7 @@ //M*/ #include #include "precomp.hpp" -#include "opencv2/highgui/highgui.hpp" +//#include "opencv2/highgui/highgui.hpp" using namespace cv; using namespace cv::ocl; @@ -52,25 +52,65 @@ using namespace std; #if !defined (HAVE_OPENCL) -cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); } -cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); } -int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;} -void cv::ocl::SURF_OCL::uploadKeypoints(const vector&, oclMat&) { throw_nogpu(); } -void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector&) { throw_nogpu(); } -void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector&) { throw_nogpu(); } -void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); } -void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); } -void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&) { throw_nogpu(); } -void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&, oclMat&, bool) { throw_nogpu(); } -void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector&, vector&, bool) { throw_nogpu(); } -void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); } +cv::ocl::SURF_OCL::SURF_OCL() +{ + throw_nogpu(); +} +cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) +{ + throw_nogpu(); +} +int cv::ocl::SURF_OCL::descriptorSize() const +{ + throw_nogpu(); + return 0; +} +void cv::ocl::SURF_OCL::uploadKeypoints(const vector &, oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &, oclMat &, bool) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &, vector &, bool) +{ + throw_nogpu(); +} +void cv::ocl::SURF_OCL::releaseMemory() +{ + throw_nogpu(); +} #else /* !defined (HAVE_OPENCL) */ -namespace cv { namespace ocl +namespace cv { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char * nonfree_surf; -}} + namespace ocl + { + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *nonfree_surf; + } +} static inline int divUp(int total, int grain) @@ -96,28 +136,28 @@ class SURF_OCL_Invoker { public: // facilities - void bindImgTex(const oclMat& img, cl_mem & texture); + void bindImgTex(const oclMat &img, cl_mem &texture); //void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold); //void loadOctaveConstants(int octave, int layer_rows, int layer_cols); // kernel callers declearations - void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows); + void icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int layer_rows); - void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, - int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols); + void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset, + int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols); - void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, - oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures); + void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter, + oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures); - void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures); + void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures); - void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures); + void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures); // end of kernel callers declearations - SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) : - surf_(surf), + SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) : + surf_(surf), img_cols(img.cols), img_rows(img.rows), use_mask(!mask.empty()), imgTex(NULL), sumTex(NULL), maskSumTex(NULL) @@ -159,13 +199,13 @@ public: // temp fix for missing min overload oclMat temp(mask.size(), mask.type()); temp.setTo(Scalar::all(1.0)); - //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this + //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this integral(surf_.mask1, surf_.maskSum); bindImgTex(surf_.maskSum, maskSumTex); } } - void detectKeypoints(oclMat& keypoints) + void detectKeypoints(oclMat &keypoints) { // create image pyramid buffers // different layers have same sized buffers, but they are sampled from gaussin kernel. @@ -186,7 +226,7 @@ public: icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows); icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave, - octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); + octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols); unsigned int maxCounter = Mat(counters).at(1 + octave); maxCounter = std::min(maxCounter, static_cast(maxCandidates)); @@ -194,7 +234,7 @@ public: if (maxCounter > 0) { icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter, - keypoints, counters, octave, layer_rows, maxFeatures); + keypoints, counters, octave, layer_rows, maxFeatures); } } unsigned int featureCounter = Mat(counters).at(0); @@ -208,7 +248,7 @@ public: findOrientation(keypoints); } - void findOrientation(oclMat& keypoints) + void findOrientation(oclMat &keypoints) { const int nFeatures = keypoints.cols; if (nFeatures > 0) @@ -217,7 +257,7 @@ public: } } - void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize) + void computeDescriptors(const oclMat &keypoints, oclMat &descriptors, int descriptorSize) { const int nFeatures = keypoints.cols; if (nFeatures > 0) @@ -239,7 +279,7 @@ public: } private: - SURF_OCL& surf_; + SURF_OCL &surf_; int img_cols, img_rows; @@ -257,8 +297,8 @@ private: oclMat additioalParamBuffer; - SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right) - { + SURF_OCL_Invoker &operator= (const SURF_OCL_Invoker &right) + { (*this) = right; return *this; } // remove warning C4512 @@ -289,7 +329,7 @@ int cv::ocl::SURF_OCL::descriptorSize() const return extended ? 128 : 64; } -void cv::ocl::SURF_OCL::uploadKeypoints(const vector& keypoints, oclMat& keypointsGPU) +void cv::ocl::SURF_OCL::uploadKeypoints(const vector &keypoints, oclMat &keypointsGPU) { if (keypoints.empty()) keypointsGPU.release(); @@ -297,17 +337,17 @@ void cv::ocl::SURF_OCL::uploadKeypoints(const vector& keypoints, oclMa { Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast(keypoints.size()), CV_32FC1); - float* kp_x = keypointsCPU.ptr(SURF_OCL::X_ROW); - float* kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); - int* kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); - int* kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); - float* kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); - float* kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); - float* kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); + float *kp_x = keypointsCPU.ptr(SURF_OCL::X_ROW); + float *kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); + int *kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); + int *kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); + float *kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); + float *kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); + float *kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); for (size_t i = 0, size = keypoints.size(); i < size; ++i) { - const KeyPoint& kp = keypoints[i]; + const KeyPoint &kp = keypoints[i]; kp_x[i] = kp.pt.x; kp_y[i] = kp.pt.y; kp_octave[i] = kp.octave; @@ -321,7 +361,7 @@ void cv::ocl::SURF_OCL::uploadKeypoints(const vector& keypoints, oclMa } } -void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector& keypoints) +void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, vector &keypoints) { const int nFeatures = keypointsGPU.cols; @@ -335,17 +375,17 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector(SURF_OCL::X_ROW); - float* kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); - int* kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); - int* kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); - float* kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); - float* kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); - float* kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); + float *kp_x = keypointsCPU.ptr(SURF_OCL::X_ROW); + float *kp_y = keypointsCPU.ptr(SURF_OCL::Y_ROW); + int *kp_laplacian = keypointsCPU.ptr(SURF_OCL::LAPLACIAN_ROW); + int *kp_octave = keypointsCPU.ptr(SURF_OCL::OCTAVE_ROW); + float *kp_size = keypointsCPU.ptr(SURF_OCL::SIZE_ROW); + float *kp_dir = keypointsCPU.ptr(SURF_OCL::ANGLE_ROW); + float *kp_hessian = keypointsCPU.ptr(SURF_OCL::HESSIAN_ROW); for (int i = 0; i < nFeatures; ++i) { - KeyPoint& kp = keypoints[i]; + KeyPoint &kp = keypoints[i]; kp.pt.x = kp_x[i]; kp.pt.y = kp_y[i]; kp.class_id = kp_laplacian[i]; @@ -357,7 +397,7 @@ void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector& descriptors) +void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &descriptorsGPU, vector &descriptors) { if (descriptorsGPU.empty()) descriptors.clear(); @@ -371,7 +411,7 @@ void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector } } -void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints) +void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints) { if (!img.empty()) { @@ -381,8 +421,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat } } -void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors, - bool useProvidedKeypoints) +void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors, + bool useProvidedKeypoints) { if (!img.empty()) { @@ -399,7 +439,7 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat } } -void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints) +void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector &keypoints) { oclMat keypointsGPU; @@ -408,8 +448,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector downloadKeypoints(keypointsGPU, keypoints); } -void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints, - oclMat& descriptors, bool useProvidedKeypoints) +void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector &keypoints, + oclMat &descriptors, bool useProvidedKeypoints) { oclMat keypointsGPU; @@ -421,8 +461,8 @@ void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector downloadKeypoints(keypointsGPU, keypoints); } -void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector& keypoints, - vector& descriptors, bool useProvidedKeypoints) +void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector &keypoints, + vector &descriptors, bool useProvidedKeypoints) { oclMat descriptorsGPU; @@ -444,7 +484,7 @@ void cv::ocl::SURF_OCL::releaseMemory() // bind source buffer to image oject. -void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture) +void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture) { cl_image_format format; int err; @@ -494,31 +534,31 @@ void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture) desc.image_depth = 0; desc.image_array_size = 1; desc.image_row_pitch = 0; - desc.image_slice_pitch= 0; + desc.image_slice_pitch = 0; desc.buffer = NULL; desc.num_mip_levels = 0; desc.num_samples = 0; - texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); + texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); #else texture = clCreateImage2D( - Context::getContext()->impl->clContext, - CL_MEM_READ_WRITE, - &format, - img.step / img.elemSize(), - img.rows, - 0, - NULL, - &err); + Context::getContext()->impl->clContext, + CL_MEM_READ_WRITE, + &format, + img.step / img.elemSize(), + img.rows, + 0, + NULL, + &err); #endif - size_t origin[] = { 0, 0, 0 }; - size_t region[] = { img.step/img.elemSize(), img.rows, 1 }; + size_t origin[] = { 0, 0, 0 }; + size_t region[] = { img.step / img.elemSize(), img.rows, 1 }; clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0); openCLSafeCall(err); } //////////////////////////// // kernel caller definitions -void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows) +void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int c_layer_rows) { const int min_size = calcSize(octave, 0); const int max_samples_i = 1 + ((img_rows - min_size) >> octave); @@ -540,15 +580,17 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, i args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows)); size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3] = { - divUp(max_samples_j, localThreads[0]) * localThreads[0], - divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2), - 1}; - openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); + size_t globalThreads[3] = + { + divUp(max_samples_j, localThreads[0]) *localThreads[0], + divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2), + 1 + }; + openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); } -void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset, - int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols) +void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset, + int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols) { const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1; @@ -578,15 +620,16 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& } size_t localThreads[3] = {16, 16, 1}; - size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0], - divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1], - 1}; + size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0], + divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1], + 1 + }; openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); } -void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter, - oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures) +void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter, + oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures) { Context *clCxt = det.clCxt; string kernelName = "icvInterpolateKeypoint"; @@ -605,14 +648,14 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMa args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures)); size_t localThreads[3] = {3, 3, 3}; - size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1}; + size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1}; openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); } -void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures) +void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures) { - Context * clCxt = counters.clCxt; + Context *clCxt = counters.clCxt; string kernelName = "icvCalcOrientation"; vector< pair > args; @@ -624,12 +667,12 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeat args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols)); size_t localThreads[3] = {32, 4, 1}; - size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1}; + size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1}; openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1); } -void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures) +void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures) { // compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D Context *clCxt = descriptors.clCxt; diff --git a/modules/ocl/test/main.cpp b/modules/ocl/test/main.cpp index f8c0f0b..92740e7 100644 --- a/modules/ocl/test/main.cpp +++ b/modules/ocl/test/main.cpp @@ -81,14 +81,14 @@ int main(int argc, char **argv) print_info(); - std::vector oclinfo; - int devnums = getDevice(oclinfo); - if(devnums<1) - { - std::cout << "no device found\n"; - return -1; - } - //setDevice(oclinfo[2]); + std::vector oclinfo; + int devnums = getDevice(oclinfo); + if(devnums < 1) + { + std::cout << "no device found\n"; + return -1; + } + //setDevice(oclinfo[1]); return RUN_ALL_TESTS(); } diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index cbad59e..0abf0ce 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -143,6 +143,10 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool) src1y = rng.uniform(0, mat1.rows - roirows); dstx = rng.uniform(0, dst.cols - roicols); dsty = rng.uniform(0, dst.rows - roirows); + maskx = rng.uniform(0, mask.cols - roicols); + masky = rng.uniform(0, mask.rows - roirows); + src2x = rng.uniform(0, mat2.cols - roicols); + src2y = rng.uniform(0, mat2.rows - roirows); #else roicols = mat1.cols; roirows = mat1.rows; @@ -150,11 +154,11 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool) src1y = 0; dstx = 0; dsty = 0; + maskx = 0; + masky = 0; + src2x = 0; + src2y = 0; #endif - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); mask_roi = mask(Rect(maskx, masky, roicols, roirows)); @@ -1454,7 +1458,7 @@ TEST_P(MagnitudeSqr, Mat) float val1 = mat1.at(i, j); float val2 = mat2.at(i, j); - ((float *)(dst.data))[i *dst.step/4 +j] = val1 * val1 + val2 * val2; + ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; // float val1 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2]; // @@ -1525,40 +1529,40 @@ INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine( Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine( - Values(CV_8UC1,CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine( - Values(CV_32FC1, CV_32FC3,CV_32FC4), + Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine( - Values(CV_32FC1, CV_32FC3,CV_32FC4), + Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine( - Values(CV_32FC1, CV_32FC3,CV_32FC4), + Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32FC1), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine( @@ -1578,24 +1582,24 @@ INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine( Values(false))); -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); +INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine( - Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1,CV_32FC3, CV_32FC4), Values(false))); + Values(CV_8UC1, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); + Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); + Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false))); + Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); //Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false))); diff --git a/modules/ocl/test/test_blend.cpp b/modules/ocl/test/test_blend.cpp index 7d76d41..94014c0 100644 --- a/modules/ocl/test/test_blend.cpp +++ b/modules/ocl/test/test_blend.cpp @@ -6,9 +6,9 @@ using namespace cv::ocl; using namespace cvtest; using namespace testing; using namespace std; - +#ifdef HAVE_OPENCL template -void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold) +void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) { result_gold.create(img1.size(), img1.type()); @@ -16,11 +16,11 @@ void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& we for (int y = 0; y < img1.rows; ++y) { - const float* weights1_row = weights1.ptr(y); - const float* weights2_row = weights2.ptr(y); - const T* img1_row = img1.ptr(y); - const T* img2_row = img2.ptr(y); - T* result_gold_row = result_gold.ptr(y); + const float *weights1_row = weights1.ptr(y); + const float *weights2_row = weights2.ptr(y); + const T *img1_row = img1.ptr(y); + const T *img2_row = img2.ptr(y); + T *result_gold_row = result_gold.ptr(y); for (int x = 0; x < img1.cols * cn; ++x) { @@ -59,16 +59,16 @@ TEST_P(Blend, Accuracy) cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); - cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F); - cv::ocl::oclMat dst(size, type); - gimg1.upload(img1); - gimg2.upload(img2); - gweights1.upload(weights1); - gweights2.upload(weights2); - cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); - cv::Mat result; + cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F); + cv::ocl::oclMat dst(size, type); + gimg1.upload(img1); + gimg2.upload(img2); + gweights1.upload(weights1); + gweights2.upload(weights2); + cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst); + cv::Mat result; cv::Mat result_gold; - dst.download(result); + dst.download(result); if (depth == CV_8U) blendLinearGold(img1, img2, weights1, weights2, result_gold); else @@ -78,6 +78,7 @@ TEST_P(Blend, Accuracy) } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC3),MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) -)); \ No newline at end of file + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4)) + )); +#endif \ No newline at end of file diff --git a/modules/ocl/test/test_brute_force_matcher.cpp b/modules/ocl/test/test_brute_force_matcher.cpp index 6ad557e..424781f 100644 --- a/modules/ocl/test/test_brute_force_matcher.cpp +++ b/modules/ocl/test/test_brute_force_matcher.cpp @@ -40,180 +40,181 @@ //M*/ #include "precomp.hpp" +#ifdef HAVE_OPENCL +namespace +{ -namespace { - -///////////////////////////////////////////////////////////////////////////////////////////////// -// BruteForceMatcher + ///////////////////////////////////////////////////////////////////////////////////////////////// + // BruteForceMatcher -CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist) -IMPLEMENT_PARAM_CLASS(DescriptorSize, int) + CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist) + IMPLEMENT_PARAM_CLASS(DescriptorSize, int) -PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize) -{ - //std::vector oclinfo; - cv::ocl::BruteForceMatcher_OCL_base::DistType distType; - int normCode; - int dim; + PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize) + { + //std::vector oclinfo; + cv::ocl::BruteForceMatcher_OCL_base::DistType distType; + int normCode; + int dim; - int queryDescCount; - int countFactor; + int queryDescCount; + int countFactor; - cv::Mat query, train; + cv::Mat query, train; - virtual void SetUp() - { - //normCode = GET_PARAM(0); - distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0); - dim = GET_PARAM(1); - - //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); - //CV_Assert(devnums > 0); - - queryDescCount = 300; // must be even number because we split train data in some cases in two - countFactor = 4; // do not change it - - cv::RNG& rng = cvtest::TS::ptr()->get_rng(); - - cv::Mat queryBuf, trainBuf; - - // Generate query descriptors randomly. - // Descriptor vector elements are integer values. - queryBuf.create(queryDescCount, dim, CV_32SC1); - rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); - queryBuf.convertTo(queryBuf, CV_32FC1); - - // Generate train decriptors as follows: - // copy each query descriptor to train set countFactor times - // and perturb some one element of the copied descriptors in - // in ascending order. General boundaries of the perturbation - // are (0.f, 1.f). - trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1); - float step = 1.f / countFactor; - for (int qIdx = 0; qIdx < queryDescCount; qIdx++) + virtual void SetUp() { - cv::Mat queryDescriptor = queryBuf.row(qIdx); - for (int c = 0; c < countFactor; c++) + //normCode = GET_PARAM(0); + distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0); + dim = GET_PARAM(1); + + //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + //CV_Assert(devnums > 0); + + queryDescCount = 300; // must be even number because we split train data in some cases in two + countFactor = 4; // do not change it + + cv::RNG &rng = cvtest::TS::ptr()->get_rng(); + + cv::Mat queryBuf, trainBuf; + + // Generate query descriptors randomly. + // Descriptor vector elements are integer values. + queryBuf.create(queryDescCount, dim, CV_32SC1); + rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); + queryBuf.convertTo(queryBuf, CV_32FC1); + + // Generate train decriptors as follows: + // copy each query descriptor to train set countFactor times + // and perturb some one element of the copied descriptors in + // in ascending order. General boundaries of the perturbation + // are (0.f, 1.f). + trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1); + float step = 1.f / countFactor; + for (int qIdx = 0; qIdx < queryDescCount; qIdx++) { - int tIdx = qIdx * countFactor + c; - cv::Mat trainDescriptor = trainBuf.row(tIdx); - queryDescriptor.copyTo(trainDescriptor); - int elem = rng(dim); - float diff = rng.uniform(step * c, step * (c + 1)); - trainDescriptor.at(0, elem) += diff; + cv::Mat queryDescriptor = queryBuf.row(qIdx); + for (int c = 0; c < countFactor; c++) + { + int tIdx = qIdx * countFactor + c; + cv::Mat trainDescriptor = trainBuf.row(tIdx); + queryDescriptor.copyTo(trainDescriptor); + int elem = rng(dim); + float diff = rng.uniform(step * c, step * (c + 1)); + trainDescriptor.at(0, elem) += diff; + } } + + queryBuf.convertTo(query, CV_32F); + trainBuf.convertTo(train, CV_32F); } + }; - queryBuf.convertTo(query, CV_32F); - trainBuf.convertTo(train, CV_32F); - } -}; + TEST_P(BruteForceMatcher, Match_Single) + { + cv::ocl::BruteForceMatcher_OCL_base matcher(distType); -TEST_P(BruteForceMatcher, Match_Single) -{ - cv::ocl::BruteForceMatcher_OCL_base matcher(distType); + std::vector matches; + matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches); - std::vector matches; - matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches); + ASSERT_EQ(static_cast(queryDescCount), matches.size()); - ASSERT_EQ(static_cast(queryDescCount), matches.size()); + int badCount = 0; + for (size_t i = 0; i < matches.size(); i++) + { + cv::DMatch match = matches[i]; + if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0)) + badCount++; + } - int badCount = 0; - for (size_t i = 0; i < matches.size(); i++) - { - cv::DMatch match = matches[i]; - if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0)) - badCount++; + ASSERT_EQ(0, badCount); } - ASSERT_EQ(0, badCount); -} - -TEST_P(BruteForceMatcher, KnnMatch_2_Single) -{ - const int knn = 2; + TEST_P(BruteForceMatcher, KnnMatch_2_Single) + { + const int knn = 2; - cv::ocl::BruteForceMatcher_OCL_base matcher(distType); + cv::ocl::BruteForceMatcher_OCL_base matcher(distType); - std::vector< std::vector > matches; - matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn); + std::vector< std::vector > matches; + matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn); - ASSERT_EQ(static_cast(queryDescCount), matches.size()); + ASSERT_EQ(static_cast(queryDescCount), matches.size()); - int badCount = 0; - for (size_t i = 0; i < matches.size(); i++) - { - if ((int)matches[i].size() != knn) - badCount++; - else + int badCount = 0; + for (size_t i = 0; i < matches.size(); i++) { - int localBadCount = 0; - for (int k = 0; k < knn; k++) + if ((int)matches[i].size() != knn) + badCount++; + else { - cv::DMatch match = matches[i][k]; - if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0)) - localBadCount++; + int localBadCount = 0; + for (int k = 0; k < knn; k++) + { + cv::DMatch match = matches[i][k]; + if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0)) + localBadCount++; + } + badCount += localBadCount > 0 ? 1 : 0; } - badCount += localBadCount > 0 ? 1 : 0; } - } - ASSERT_EQ(0, badCount); -} + ASSERT_EQ(0, badCount); + } -TEST_P(BruteForceMatcher, RadiusMatch_Single) -{ - float radius; - if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist) - radius = 1.f / countFactor /countFactor; - else - radius = 1.f / countFactor; - - cv::ocl::BruteForceMatcher_OCL_base matcher(distType); - - // assume support atomic. - //if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) - //{ - // try - // { - // std::vector< std::vector > matches; - // matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); - // } - // catch (const cv::Exception& e) - // { - // ASSERT_EQ(CV_StsNotImplemented, e.code); - // } - //} - //else + TEST_P(BruteForceMatcher, RadiusMatch_Single) { - std::vector< std::vector > matches; - matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius); + float radius; + if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist) + radius = 1.f / countFactor / countFactor; + else + radius = 1.f / countFactor; + + cv::ocl::BruteForceMatcher_OCL_base matcher(distType); + + // assume support atomic. + //if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) + //{ + // try + // { + // std::vector< std::vector > matches; + // matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); + // } + // catch (const cv::Exception& e) + // { + // ASSERT_EQ(CV_StsNotImplemented, e.code); + // } + //} + //else + { + std::vector< std::vector > matches; + matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius); - ASSERT_EQ(static_cast(queryDescCount), matches.size()); + ASSERT_EQ(static_cast(queryDescCount), matches.size()); - int badCount = 0; - for (size_t i = 0; i < matches.size(); i++) - { - if ((int)matches[i].size() != 1) - { - badCount++; - } - else + int badCount = 0; + for (size_t i = 0; i < matches.size(); i++) { - cv::DMatch match = matches[i][0]; - if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0)) + if ((int)matches[i].size() != 1) + { badCount++; + } + else + { + cv::DMatch match = matches[i][0]; + if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0)) + badCount++; + } } - } - ASSERT_EQ(0, badCount); + ASSERT_EQ(0, badCount); + } } -} -INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( - //ALL_DEVICES, - testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)), - testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)))); + INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine( + //ALL_DEVICES, + testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)), + testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304)))); } // namespace - +#endif diff --git a/modules/ocl/test/test_canny.cpp b/modules/ocl/test/test_canny.cpp index e728c99..f206cc3 100644 --- a/modules/ocl/test/test_canny.cpp +++ b/modules/ocl/test/test_canny.cpp @@ -44,8 +44,12 @@ //M*/ #include "precomp.hpp" - -#define FILTER_IMAGE "../../../samples/gpu/road.png" +#ifdef HAVE_OPENCL +#ifdef WIN32 +#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg" +#else +#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path +#endif #define SHOW_RESULT 0 //////////////////////////////////////////////////////// @@ -60,13 +64,13 @@ PARAM_TEST_CASE(Canny, AppertureSize, L2gradient) bool useL2gradient; cv::Mat edges_gold; - //std::vector oclinfo; + //std::vector oclinfo; virtual void SetUp() { apperture_size = GET_PARAM(0); useL2gradient = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums > 0); } }; @@ -78,31 +82,32 @@ TEST_P(Canny, Accuracy) double low_thresh = 50.0; double high_thresh = 100.0; - cv::resize(img, img, cv::Size(512, 384)); - cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img); + cv::resize(img, img, cv::Size(512, 384)); + cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img); - cv::ocl::oclMat edges; - cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); + cv::ocl::oclMat edges; + cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); - char filename [100]; - sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient); + char filename [100]; + sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient); - cv::Mat edges_gold; - cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient); + cv::Mat edges_gold; + cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient); #if SHOW_RESULT - cv::Mat edges_x2, ocl_edges(edges); - edges_x2.create(edges.rows, edges.cols * 2, edges.type()); - edges_x2.setTo(0); - cv::add(edges_gold,cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows))); - cv::add(ocl_edges,cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows))); - cv::namedWindow("Canny result (left: cpu, right: ocl)"); + cv::Mat edges_x2, ocl_edges(edges); + edges_x2.create(edges.rows, edges.cols * 2, edges.type()); + edges_x2.setTo(0); + cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows))); + cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows))); + cv::namedWindow("Canny result (left: cpu, right: ocl)"); cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2); - cv::waitKey(); + cv::waitKey(); #endif //OUTPUT_RESULT - EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2); + EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine( - testing::Values(AppertureSize(3), AppertureSize(5)), - testing::Values(L2gradient(false), L2gradient(true)))); + testing::Values(AppertureSize(3), AppertureSize(5)), + testing::Values(L2gradient(false), L2gradient(true)))); +#endif diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/test/test_columnsum.cpp index abe113e..9bd2e6f 100644 --- a/modules/ocl/test/test_columnsum.cpp +++ b/modules/ocl/test/test_columnsum.cpp @@ -16,7 +16,7 @@ // // @Authors // Chunpeng Zhang chunpeng@multicorewareinc.com -// +// // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -59,13 +59,13 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool ) { cv::Size size; cv::Mat src; - bool useRoi; - //std::vector oclinfo; + bool useRoi; + //std::vector oclinfo; virtual void SetUp() { size = GET_PARAM(0); - useRoi = GET_PARAM(1); + useRoi = GET_PARAM(1); //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); //CV_Assert(devnums > 0); } @@ -74,10 +74,10 @@ PARAM_TEST_CASE(ColumnSum, cv::Size, bool ) TEST_P(ColumnSum, Accuracy) { cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - cv::ocl::oclMat d_src(src); + cv::ocl::oclMat d_dst; + cv::ocl::oclMat d_src(src); - cv::ocl::columnSum(d_src,d_dst); + cv::ocl::columnSum(d_src, d_dst); cv::Mat dst(d_dst); @@ -100,7 +100,7 @@ TEST_P(ColumnSum, Accuracy) } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine( - DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true)))); + DIFFERENT_SIZES, testing::Values(Inverse(false), Inverse(true)))); -#endif +#endif diff --git a/modules/ocl/test/test_fft.cpp b/modules/ocl/test/test_fft.cpp index d0e3acd..13c71a8 100644 --- a/modules/ocl/test/test_fft.cpp +++ b/modules/ocl/test/test_fft.cpp @@ -48,50 +48,50 @@ using namespace std; #ifdef HAVE_CLAMDFFT //////////////////////////////////////////////////////////////////////////// // Dft -PARAM_TEST_CASE(Dft, cv::Size, bool) +PARAM_TEST_CASE(Dft, cv::Size, bool) { - cv::Size dft_size; - bool dft_rows; - //std::vector oclinfo; + cv::Size dft_size; + bool dft_rows; + //std::vector oclinfo; virtual void SetUp() { - //int devnums = getDevice(oclinfo); - // CV_Assert(devnums > 0); - dft_size = GET_PARAM(0); - dft_rows = GET_PARAM(1); + //int devnums = getDevice(oclinfo); + // CV_Assert(devnums > 0); + dft_size = GET_PARAM(0); + dft_rows = GET_PARAM(1); } }; TEST_P(Dft, C2C) { - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); - cv::Mat b_gold; - int flags = 0; - flags |= dft_rows ? cv::DFT_ROWS : 0; + cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); + cv::Mat b_gold; + int flags = 0; + flags |= dft_rows ? cv::DFT_ROWS : 0; - cv::ocl::oclMat d_b; - - cv::dft(a, b_gold, flags); - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, ""); + cv::ocl::oclMat d_b; + + cv::dft(a, b_gold, flags); + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, ""); } TEST_P(Dft, R2CthenC2R) { - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); - - int flags = 0; - //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet + cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); + + int flags = 0; + //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet - cv::ocl::oclMat d_b, d_c; - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); - EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); + cv::ocl::oclMat d_b, d_c; + cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); + cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); + EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); } INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( - testing::Values(cv::Size(5, 4), cv::Size(20, 20)), - testing::Values(false, true))); + testing::Values(cv::Size(5, 4), cv::Size(20, 20)), + testing::Values(false, true))); #endif // HAVE_CLAMDFFT diff --git a/modules/ocl/test/test_filters.cpp b/modules/ocl/test/test_filters.cpp index b502bd9..7377eaa 100644 --- a/modules/ocl/test/test_filters.cpp +++ b/modules/ocl/test/test_filters.cpp @@ -119,7 +119,7 @@ PARAM_TEST_CASE(FilterTestBase, MatType, bool) { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -211,10 +211,10 @@ PARAM_TEST_CASE(Blur, MatType, cv::Size, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -311,10 +311,10 @@ PARAM_TEST_CASE(LaplacianTestBase, MatType, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat.cols); roirows = rng.uniform(2, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -416,10 +416,10 @@ PARAM_TEST_CASE(ErodeDilateBase, MatType, bool) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -559,10 +559,10 @@ PARAM_TEST_CASE(Sobel, MatType, int, int, int, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -663,10 +663,10 @@ PARAM_TEST_CASE(Scharr, MatType, int, int, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -770,10 +770,10 @@ PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -822,13 +822,13 @@ TEST_P(GaussianBlur, Mat) -INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), +INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(1, 3))); //INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); @@ -840,20 +840,20 @@ INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values( INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false))); -INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), +INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size(3, 3), cv::Size(5, 5)), Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); - + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_gemm.cpp b/modules/ocl/test/test_gemm.cpp index 167c004..4ec3337 100644 --- a/modules/ocl/test/test_gemm.cpp +++ b/modules/ocl/test/test_gemm.cpp @@ -48,38 +48,38 @@ using namespace std; #ifdef HAVE_CLAMDBLAS //////////////////////////////////////////////////////////////////////////// // GEMM -PARAM_TEST_CASE(Gemm, int, cv::Size, int) +PARAM_TEST_CASE(Gemm, int, cv::Size, int) { - int type; - cv::Size mat_size; - int flags; - //vector info; + int type; + cv::Size mat_size; + int flags; + //vector info; virtual void SetUp() { - type = GET_PARAM(0); - mat_size = GET_PARAM(1); - flags = GET_PARAM(2); - //cv::ocl::getDevice(info); + type = GET_PARAM(0); + mat_size = GET_PARAM(1); + flags = GET_PARAM(2); + //cv::ocl::getDevice(info); } }; TEST_P(Gemm, Accuracy) { - cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); + cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat dst; - cv::ocl::oclMat ocl_dst; + cv::Mat dst; + cv::ocl::oclMat ocl_dst; - cv::gemm(a, b, 1.0, c, 1.0, dst, flags); - cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags); + cv::gemm(a, b, 1.0, c, 1.0, dst, flags); + cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags); - EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, ""); + EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, ""); } INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( - testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/), - testing::Values(cv::Size(20, 20), cv::Size(300, 300)), - testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); + testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/), + testing::Values(cv::Size(20, 20), cv::Size(300, 300)), + testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T))); #endif diff --git a/modules/ocl/test/test_haar.cpp b/modules/ocl/test/test_haar.cpp index 59faffe..1a21ff6 100644 --- a/modules/ocl/test/test_haar.cpp +++ b/modules/ocl/test/test_haar.cpp @@ -53,107 +53,114 @@ using namespace testing; using namespace std; using namespace cv; -struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; +struct getRect +{ + Rect operator ()(const CvAvgComp &e) const + { + return e.rect; + } +}; PARAM_TEST_CASE(HaarTestBase, int, int) { - //std::vector oclinfo; - cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::CascadeClassifier cpucascade, cpunestedCascade; - // Mat img; - - double scale; - int index; - - virtual void SetUp() - { - scale = 1.0; - index=0; - string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml"; - - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) - { - cout << "ERROR: Could not load classifier cascade" << endl; - cout << "Usage: facedetect [--cascade=]\n" - " [--scale[=\n" - " [filename|camera_index]\n" << endl ; - return; - } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath("E:\\"); - } + //std::vector oclinfo; + cv::ocl::OclCascadeClassifier cascade, nestedCascade; + cv::CascadeClassifier cpucascade, cpunestedCascade; + // Mat img; + + double scale; + int index; + + virtual void SetUp() + { + scale = 1.0; + index = 0; + string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; + + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) + { + cout << "ERROR: Could not load classifier cascade" << endl; + cout << "Usage: facedetect [--cascade=]\n" + " [--scale[=\n" + " [filename|camera_index]\n" << endl ; + return; + } + //int devnums = getDevice(oclinfo); + //CV_Assert(devnums>0); + ////if you want to use undefault device, set it here + ////setDevice(oclinfo[0]); + //cv::ocl::setBinpath("E:\\"); + } }; ////////////////////////////////faceDetect///////////////////////////////////////////////// struct Haar : HaarTestBase {}; -TEST_F(Haar, FaceDetect) -{ - string imgName = "../../../samples/c/lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read test" << index <<".jpg" << std::endl; - return ; - } - - int i = 0; - double t = 0; - vector faces, oclfaces; - - const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; - - Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - - cv::ocl::oclMat image; - CvSeq* _objects; - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, 0 - |CV_HAAR_SCALE_IMAGE - , Size(30,30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, - 3, 0 - |CV_HAAR_SCALE_IMAGE - , Size(30,30), Size(0, 0) ); - EXPECT_EQ(faces.size(),oclfaces.size()); - /* for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) - { - Mat smallImgROI; - Point center; - Scalar color = colors[i%8]; - int radius; - center.x = cvRound((r->x + r->width*0.5)*scale); - center.y = cvRound((r->y + r->height*0.5)*scale); - radius = cvRound((r->width + r->height)*0.25*scale); - circle( img, center, radius, color, 3, 8, 0 ); - } */ - //namedWindow("result"); - //imshow("result",img); - //waitKey(0); - //destroyAllWindows(); +TEST_F(Haar, FaceDetect) +{ + string imgName = "../../../samples/c/lena.jpg"; + Mat img = imread( imgName, 1 ); + + if(img.empty()) + { + std::cout << "Couldn't read test" << index << ".jpg" << std::endl; + return ; + } + + int i = 0; + double t = 0; + vector faces, oclfaces; + + const static Scalar colors[] = { CV_RGB(0, 0, 255), + CV_RGB(0, 128, 255), + CV_RGB(0, 255, 255), + CV_RGB(0, 255, 0), + CV_RGB(255, 128, 0), + CV_RGB(255, 255, 0), + CV_RGB(255, 0, 0), + CV_RGB(255, 0, 255) + } ; + + Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); + MemStorage storage(cvCreateMemStorage(0)); + cvtColor( img, gray, CV_BGR2GRAY ); + resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + equalizeHist( smallImg, smallImg ); + + + cv::ocl::oclMat image; + CvSeq *_objects; + image.upload(smallImg); + _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, + 3, 0 + | CV_HAAR_SCALE_IMAGE + , Size(30, 30), Size(0, 0) ); + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + oclfaces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); + + cpucascade.detectMultiScale( smallImg, faces, 1.1, + 3, 0 + | CV_HAAR_SCALE_IMAGE + , Size(30, 30), Size(0, 0) ); + EXPECT_EQ(faces.size(), oclfaces.size()); + /* for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) + { + Mat smallImgROI; + Point center; + Scalar color = colors[i%8]; + int radius; + center.x = cvRound((r->x + r->width*0.5)*scale); + center.y = cvRound((r->y + r->height*0.5)*scale); + radius = cvRound((r->width + r->height)*0.25*scale); + circle( img, center, radius, color, 3, 8, 0 ); + } */ + //namedWindow("result"); + //imshow("result",img); + //waitKey(0); + //destroyAllWindows(); } #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_hog.cpp index 8593c3a..16176a2 100644 --- a/modules/ocl/test/test_hog.cpp +++ b/modules/ocl/test/test_hog.cpp @@ -49,15 +49,15 @@ using namespace std; #ifdef HAVE_OPENCL -PARAM_TEST_CASE(HOG,cv::Size,int) +PARAM_TEST_CASE(HOG, cv::Size, int) { - cv::Size winSize; - int type; - virtual void SetUp() - { - winSize = GET_PARAM(0); - type = GET_PARAM(1); - } + cv::Size winSize; + int type; + virtual void SetUp() + { + winSize = GET_PARAM(0); + type = GET_PARAM(1); + } }; TEST_P(HOG, GetDescriptors) @@ -114,7 +114,7 @@ TEST_P(HOG, GetDescriptors) bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) { return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); + (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); } TEST_P(HOG, Detect) @@ -166,21 +166,21 @@ TEST_P(HOG, Detect) // OpenCL detection std::vector d_found; - ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2); - + ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + // CPU detection std::vector found; switch (type) { case CV_8UC1: - hog.detectMultiScale(img, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2); + hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); break; case CV_8UC4: default: - hog.detectMultiScale(img_rgb, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2); + hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); break; } - + // Ground-truth rectangular people window cv::Rect win1_64x128(231, 190, 72, 144); cv::Rect win2_64x128(621, 156, 97, 194); @@ -240,14 +240,14 @@ TEST_P(HOG, Detect) } } - char s[100]={0}; + char s[100] = {0}; EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3, s); } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine( - testing::Values(cv::Size(64, 128), cv::Size(48, 96)), - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); + testing::Values(cv::Size(64, 128), cv::Size(48, 96)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); #endif //HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index bf2aa49..8e4c0eb 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -125,7 +125,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size { int t0, t1, t2; t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -134,7 +134,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size rowCount++; } t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -143,7 +143,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size rowCount++; } t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -152,7 +152,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size rowCount++; } t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -165,7 +165,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size for(; x <= maxx; x++, ptr += 4) { int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2) + if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) { s0 += t0; s1 += t1; @@ -191,7 +191,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size s2 = cvFloor(s2 * icount); bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0-c0+255] + tab[s1-c1+255] + tab[s2-c2+255] <= eps); + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); //revise the pointer corresponding to the new (y0,x0) revx = x1 - x0; @@ -388,10 +388,10 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -488,10 +488,10 @@ TEST_P(bilateralFilter, Mat) int radius = 9; int d = 2 * radius + 1; double sigmaspace = 20.0; - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101}; - const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"}; + int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; + const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; - if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) + if (mat1.depth() != CV_8U || mat1.type() != dst.type()) { cout << "Unsupported type" << endl; EXPECT_DOUBLE_EQ(0.0, 0.0); @@ -502,47 +502,41 @@ TEST_P(bilateralFilter, Mat) for(int j = 0; j < LOOP_TIMES; j++) { random_roi(); - #ifdef RANDOMROI - if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=radius) || (mat1_roi.cols<=radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) - { - continue; - } - if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows)) - { - dst_roi.adjustROI(radius, radius, radius, radius); - cldst_roi.adjustROI(radius, radius, radius, radius); - } - else - { - continue; - } - #endif - cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED); - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED); + if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) + { + continue; + } + //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows)) + //{ + // dst_roi.adjustROI(radius, radius, radius, radius); + // cldst_roi.adjustROI(radius, radius, radius, radius); + //} + //else + //{ + // continue; + //} + + cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED); + cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED); cv::Mat cpu_cldst; - #ifndef RANDOMROI - cldst_roi.download(cpu_cldst); - #else - cldst.download(cpu_cldst); - #endif + cldst.download(cpu_cldst); + char sss[1024]; sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]); + //for(int i=0;i(i,j)!=cpu_cldst.at(i,j)) + // cout<< i <<" "<< j <<" "<< (int)dst.at(i,j)<<" "<< (int)cpu_cldst.at(i,j)<<" "; + // } + // cout<(i,j)<<" "<< (int)cpu_cldst.at(i,j)<<" "; - // } - // cout<get_rng(); - int top = rng.uniform(0, 10); - int bottom = rng.uniform(0, 10); - int left = rng.uniform(0, 10); - int right = rng.uniform(0, 10); + int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; + const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"}; + cv::RNG &rng = TS::ptr()->get_rng(); + int top = rng.uniform(0, 10); + int bottom = rng.uniform(0, 10); + int left = rng.uniform(0, 10); + int right = rng.uniform(0, 10); if (mat1.type() != dst.type()) { cout << "Unsupported type" << endl; @@ -573,45 +567,45 @@ TEST_P(CopyMakeBorder, Mat) for(int j = 0; j < LOOP_TIMES; j++) { random_roi(); - #ifdef RANDOMROI - if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=left) || (mat1_roi.cols<=right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom)) - { - continue; - } - if((dstx>=left) && (dsty >= top) && (dstx+cldst_roi.cols+right <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+bottom <= cldst_roi.wholerows)) - { - dst_roi.adjustROI(top, bottom, left, right); - cldst_roi.adjustROI(top, bottom, left, right); - } - else - { - continue; - } - #endif - cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0)); - cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0)); +#ifdef RANDOMROI + if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= left) || (mat1_roi.cols <= right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom)) + { + continue; + } + if((dstx >= left) && (dsty >= top) && (dstx + cldst_roi.cols + right <= cldst_roi.wholecols) && (dsty + cldst_roi.rows + bottom <= cldst_roi.wholerows)) + { + dst_roi.adjustROI(top, bottom, left, right); + cldst_roi.adjustROI(top, bottom, left, right); + } + else + { + continue; + } +#endif + cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); + cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); cv::Mat cpu_cldst; - #ifndef RANDOMROI +#ifndef RANDOMROI cldst_roi.download(cpu_cldst); - #else - cldst.download(cpu_cldst); - #endif +#else + cldst.download(cpu_cldst); +#endif char sss[1024]; - sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right,borderstr[i]); - #ifndef RANDOMROI + sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right, borderstr[i]); +#ifndef RANDOMROI EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss); - #else - //for(int i=0;i(i,j)<<" "; - //} - //cout<(i,j)<<" "; + //} + //cout<get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); src_roicols = rng.uniform(1, mat1.cols); src_roirows = rng.uniform(1, mat1.rows); dst_roicols = rng.uniform(1, dst.cols); @@ -872,7 +866,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) cv::Mat map2; //std::vector oclinfo; - + int src_roicols; int src_roirows; int dst_roicols; @@ -915,7 +909,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); //CV_Assert(devnums > 0); - cv::RNG& rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT); cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); @@ -937,31 +931,31 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) else { - cout<<"The wrong input type"<get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); dst_roicols = rng.uniform(1, dst.cols); dst_roirows = rng.uniform(1, dst.rows); @@ -969,7 +963,7 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) src_roicols = rng.uniform(1, src.cols); src_roirows = rng.uniform(1, src.rows); - + srcx = rng.uniform(0, src.cols - src_roicols); srcy = rng.uniform(0, src.rows - src_roirows); dstx = rng.uniform(0, dst.cols - dst_roicols); @@ -985,19 +979,19 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) { - map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows)); + map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); gmap1_roi = map1_roi; } else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) { - map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows)); + map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); gmap1_roi = map1_roi; - map2_roi = map2(Rect(map2x,map2y,map2_roicols,map2_roirows)); + map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows)); gmap2_roi = map2_roi; } - src_roi = src(Rect(srcx,srcy,src_roicols,src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); + src_roi = src(Rect(srcx, srcy, src_roicols, src_roirows)); + dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); gsrc_roi = src_roi; gdst = dst; gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); @@ -1006,15 +1000,15 @@ PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) TEST_P(Remap, Mat) { - if((interpolation == 1 && map1Type == CV_16SC2) ||(map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) + if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) { cout << "Don't support the dataType" << endl; - return; + return; } - int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; - const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; + int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; + const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; // for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++) - for(int j=0; j<100; j++) + for(int j = 0; j < 100; j++) { random_roi(); cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); @@ -1025,11 +1019,11 @@ TEST_P(Remap, Mat) char sss[1024]; sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty); - + if(interpolation == 0) EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss); EXPECT_MAT_NEAR(dst, cpu_dst, 2.0, sss); - + } } @@ -1105,14 +1099,14 @@ PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); src_roicols = rng.uniform(1, mat1.cols); src_roirows = rng.uniform(1, mat1.rows); - dst_roicols = (int)(src_roicols*fx); - dst_roirows = (int)(src_roirows*fy); + dst_roicols = (int)(src_roicols * fx); + dst_roirows = (int)(src_roirows * fy); src1x = rng.uniform(0, mat1.cols - src_roicols); src1y = rng.uniform(0, mat1.rows - src_roirows); dstx = rng.uniform(0, dst.cols - dst_roicols); @@ -1151,7 +1145,7 @@ TEST_P(Resize, Mat) // cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); // cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - if(dst_roicols<1||dst_roirows<1) continue; + if(dst_roicols < 1 || dst_roirows < 1) continue; cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); @@ -1215,10 +1209,10 @@ PARAM_TEST_CASE(Threshold, MatType, ThreshOp) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -1411,15 +1405,15 @@ TEST_P(meanShiftProc, Mat) /////////////////////////////////////////////////////////////////////////////////////// //hist -void calcHistGold(const cv::Mat& src, cv::Mat& hist) +void calcHistGold(const cv::Mat &src, cv::Mat &hist) { hist.create(1, 256, CV_32SC1); hist.setTo(cv::Scalar::all(0)); - int* hist_row = hist.ptr(); + int *hist_row = hist.ptr(); for (int y = 0; y < src.rows; ++y) { - const uchar* src_row = src.ptr(y); + const uchar *src_row = src.ptr(y); for (int x = 0; x < src.cols; ++x) ++hist_row[src_row[x]]; @@ -1444,19 +1438,19 @@ PARAM_TEST_CASE(histTestBase, MatType, MatType) cv::ocl::oclMat gdst_hist; //ocl mat with roi cv::ocl::oclMat gsrc_roi; -// std::vector oclinfo; + // std::vector oclinfo; virtual void SetUp() { type_src = GET_PARAM(0); - + cv::RNG &rng = TS::ptr()->get_rng(); cv::Size size = cv::Size(MWIDTH, MHEIGHT); src = randomMat(rng, size, type_src, 0, 256, false); -// int devnums = getDevice(oclinfo); -// CV_Assert(devnums > 0); + // int devnums = getDevice(oclinfo); + // CV_Assert(devnums > 0); //if you want to use undefault device, set it here //setDevice(oclinfo[0]); } @@ -1596,45 +1590,45 @@ void conv2( cv::Mat x, cv::Mat y, cv::Mat z) int N2 = y.rows; int M2 = y.cols; - int i,j; - int m,n; - + int i, j; + int m, n; + float *kerneldata = (float *)(x.data); float *srcdata = (float *)(y.data); float *dstdata = (float *)(z.data); - for(i=0;i>2)+n]*srcdata[r*(y.step>>2)+c]; + r = min(max((i - N1 / 2 + m), 0), N2 - 1); + c = min(max((j - M1 / 2 + n), 0), M2 - 1); + temp += kerneldata[m * (x.step >> 2) + n] * srcdata[r * (y.step >> 2) + c]; } - dstdata[i*(z.step >> 2)+j]=temp; + dstdata[i * (z.step >> 2) + j] = temp; } } TEST_P(Convolve, Mat) { - if(mat1.type()!=CV_32FC1) + if(mat1.type() != CV_32FC1) { - cout<<"\tUnsupported type\t\n"; + cout << "\tUnsupported type\t\n"; } - for(int j=0;j oclinfo; + //std::vector oclinfo; virtual void SetUp() { @@ -77,33 +78,33 @@ PARAM_TEST_CASE(MatchTemplate8U, cv::Size, TemplateSize, Channels, TemplateMetho TEST_P(MatchTemplate8U, Accuracy) { - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; - std::cout << "Channels: " << cn << std::endl; + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; + std::cout << "Channels: " << cn << std::endl; - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); + cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); cv::Mat dst_gold; cv::matchTemplate(image, templ, dst_gold, method); - char sss [100] = ""; + char sss [100] = ""; - cv::Mat mat_dst; - dst.download(mat_dst); + cv::Mat mat_dst; + dst.download(mat_dst); EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); #if PERF_TEST - { - P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); - P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); - } + { + P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); + P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); + } #endif // PERF_TEST } @@ -113,7 +114,7 @@ PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMeth cv::Size templ_size; int cn; int method; - //std::vector oclinfo; + //std::vector oclinfo; virtual void SetUp() { @@ -132,42 +133,42 @@ TEST_P(MatchTemplate32F, Accuracy) cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ); - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); + cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); cv::Mat dst_gold; cv::matchTemplate(image, templ, dst_gold, method); - char sss [100] = ""; + char sss [100] = ""; - cv::Mat mat_dst; - dst.download(mat_dst); + cv::Mat mat_dst; + dst.download(mat_dst); EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); #if PERF_TEST - { - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl; - std::cout << "Channels: " << cn << std::endl; - P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); - P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); - } + { + std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; + std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; + std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; + std::cout << "Channels: " << cn << std::endl; + P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); + P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); + } #endif // PERF_TEST } -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, - testing::Combine( - MTEMP_SIZES, - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(3),Channels(4)), - ALL_TEMPLATE_METHODS - ) -); - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( - MTEMP_SIZES, - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(3),Channels(4)), - testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); - +//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, +// testing::Combine( +// MTEMP_SIZES, +// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), +// testing::Values(Channels(1), Channels(3), Channels(4)), +// ALL_TEMPLATE_METHODS +// ) +// ); +// +//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( +// MTEMP_SIZES, +// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), +// testing::Values(Channels(1), Channels(3), Channels(4)), +// testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); +#endif diff --git a/modules/ocl/test/test_matrix_operation.cpp b/modules/ocl/test/test_matrix_operation.cpp index 7d8a2fb..ef11aaa 100644 --- a/modules/ocl/test/test_matrix_operation.cpp +++ b/modules/ocl/test/test_matrix_operation.cpp @@ -98,10 +98,10 @@ PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat.cols); roirows = rng.uniform(1, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -204,10 +204,10 @@ PARAM_TEST_CASE(CopyToTestBase, MatType, bool) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat.cols); roirows = rng.uniform(1, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -329,10 +329,10 @@ PARAM_TEST_CASE(SetToTestBase, MatType, bool) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat.cols); roirows = rng.uniform(1, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -440,10 +440,10 @@ PARAM_TEST_CASE(convertC3C4, MatType, cv::Size) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(2, mat1.cols); roirows = rng.uniform(2, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -477,12 +477,12 @@ TEST_P(convertC3C4, Accuracy) for(int j = 0; j < LOOP_TIMES; j++) { //random_roi(); - int width = rng.uniform(2, MWIDTH); - int height = rng.uniform(2, MHEIGHT); + int width = rng.uniform(2, MWIDTH); + int height = rng.uniform(2, MHEIGHT); cv::Size size(width, height); mat1 = randomMat(rng, size, type, 0, 40, false); - gmat1 = mat1; + gmat1 = mat1; cv::Mat cpu_dst; gmat1.download(cpu_dst); char sss[1024]; @@ -493,18 +493,18 @@ TEST_P(convertC3C4, Accuracy) } INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4))); + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4))); INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), + Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false))); // Values(false) is the reserved parameter INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine( Values(CV_8UC3, CV_32SC3, CV_32FC3), - Values(cv::Size()))); + Values(cv::Size()))); #endif diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyrdown.cpp index ede1a30..c7233cc 100644 --- a/modules/ocl/test/test_pyrdown.cpp +++ b/modules/ocl/test/test_pyrdown.cpp @@ -58,13 +58,13 @@ using namespace std; PARAM_TEST_CASE(PyrDown, MatType, int) { - int type; - int channels; + int type; + int channels; virtual void SetUp() { type = GET_PARAM(0); - channels = GET_PARAM(1); + channels = GET_PARAM(1); //int devnums = getDevice(oclinfo); //CV_Assert(devnums > 0); @@ -72,9 +72,9 @@ PARAM_TEST_CASE(PyrDown, MatType, int) ////setDevice(oclinfo[0]); } - void Cleanup() - { - } + void Cleanup() + { + } }; @@ -84,21 +84,21 @@ TEST_P(PyrDown, Mat) for(int j = 0; j < LOOP_TIMES; j++) { cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); + cv::RNG &rng = TS::ptr()->get_rng(); + cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); - cv::ocl::oclMat gsrc(src), gdst; - cv::Mat dst_cpu; - cv::pyrDown(src, dst_cpu); - cv::ocl::pyrDown(gsrc, gdst); + cv::ocl::oclMat gsrc(src), gdst; + cv::Mat dst_cpu; + cv::pyrDown(src, dst_cpu); + cv::ocl::pyrDown(gsrc, gdst); cv::Mat dst; gdst.download(dst); - char s[1024]={0}; + char s[1024] = {0}; - EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s); + EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s); - Cleanup(); + Cleanup(); } } diff --git a/modules/ocl/test/test_pyrlk.cpp b/modules/ocl/test/test_pyrlk.cpp index c35c72a..f9bcceb 100644 --- a/modules/ocl/test/test_pyrlk.cpp +++ b/modules/ocl/test/test_pyrlk.cpp @@ -72,7 +72,7 @@ PARAM_TEST_CASE(Sparse, bool, bool) virtual void SetUp() { UseSmart = GET_PARAM(0); - useGray = GET_PARAM(0); + useGray = GET_PARAM(0); } }; @@ -94,28 +94,28 @@ TEST_P(Sparse, Mat) cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0); cv::ocl::oclMat d_pts; - cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]); + cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); d_pts.upload(pts_mat); cv::ocl::PyrLKOpticalFlow pyrLK; - cv::ocl::oclMat oclFrame0; - cv::ocl::oclMat oclFrame1; + cv::ocl::oclMat oclFrame0; + cv::ocl::oclMat oclFrame1; cv::ocl::oclMat d_nextPts; cv::ocl::oclMat d_status; cv::ocl::oclMat d_err; - oclFrame0 = frame0; - oclFrame1 = frame1; + oclFrame0 = frame0; + oclFrame1 = frame1; pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err); std::vector nextPts(d_nextPts.cols); - cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]); + cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void *)&nextPts[0]); d_nextPts.download(nextPts_mat); std::vector status(d_status.cols); - cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]); + cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void *)&status[0]); d_status.download(status_mat); //std::vector err(d_err.cols); @@ -156,12 +156,12 @@ TEST_P(Sparse, Mat) double bad_ratio = static_cast(mistmatch) / (nextPts.size() * 2); ASSERT_LE(bad_ratio, 0.05f); - + } INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine( - Values(false, true), - Values(false))); + Values(false, true), + Values(false))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp index c50aeb5..9889b92 100644 --- a/modules/ocl/test/test_pyrup.cpp +++ b/modules/ocl/test/test_pyrup.cpp @@ -56,37 +56,37 @@ using namespace std; PARAM_TEST_CASE(PyrUp, MatType, int) { - int type; - int channels; - //std::vector oclinfo; + int type; + int channels; + //std::vector oclinfo; - virtual void SetUp() - { - //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); - //CV_Assert(devnums > 0); - type = GET_PARAM(0); - channels = GET_PARAM(1); - } + virtual void SetUp() + { + //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE); + //CV_Assert(devnums > 0); + type = GET_PARAM(0); + channels = GET_PARAM(1); + } }; -TEST_P(PyrUp,Accuracy) +TEST_P(PyrUp, Accuracy) { - for(int j = 0; j < LOOP_TIMES; j++) + for(int j = 0; j < LOOP_TIMES; j++) { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size,CV_MAKETYPE(type, channels)); - Mat dst_gold; - pyrUp(src,dst_gold); - ocl::oclMat dst; - ocl::oclMat srcMat(src); - ocl::pyrUp(srcMat,dst); - Mat cpu_dst; - dst.download(cpu_dst); - char s[100]={0}; + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + Mat dst_gold; + pyrUp(src, dst_gold); + ocl::oclMat dst; + ocl::oclMat srcMat(src); + ocl::pyrUp(srcMat, dst); + Mat cpu_dst; + dst.download(cpu_dst); + char s[100] = {0}; + + EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0), s); + } - EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s); - } - } diff --git a/modules/ocl/test/test_split_merge.cpp b/modules/ocl/test/test_split_merge.cpp index e4a4f25..f41d16e 100644 --- a/modules/ocl/test/test_split_merge.cpp +++ b/modules/ocl/test/test_split_merge.cpp @@ -119,10 +119,10 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat1.cols); roirows = rng.uniform(1, mat1.rows); src1x = rng.uniform(0, mat1.cols - roicols); @@ -130,8 +130,8 @@ PARAM_TEST_CASE(MergeTestBase, MatType, int) src2x = rng.uniform(0, mat2.cols - roicols); src2y = rng.uniform(0, mat2.rows - roirows); src3x = rng.uniform(0, mat3.cols - roicols); - src3y = rng.uniform(0, mat3.cols - roirows); - src4x = rng.uniform(0, mat4.rows - roicols); + src3y = rng.uniform(0, mat3.rows - roirows); + src4x = rng.uniform(0, mat4.cols - roicols); src4y = rng.uniform(0, mat4.rows - roirows); dstx = rng.uniform(0, dst.cols - roicols); dsty = rng.uniform(0, dst.rows - roirows); @@ -194,13 +194,13 @@ TEST_P(Merge, Accuracy) dev_gsrc.push_back(gmat1); if(channels >= 2) - dev_gsrc.push_back(gmat2); + dev_gsrc.push_back(gmat2); if(channels >= 3) - dev_gsrc.push_back(gmat3); + dev_gsrc.push_back(gmat3); if(channels >= 4) - dev_gsrc.push_back(gmat4); + dev_gsrc.push_back(gmat4); cv::merge(dev_src, dst_roi); cv::ocl::merge(dev_gsrc, gdst); @@ -287,10 +287,10 @@ PARAM_TEST_CASE(SplitTestBase, MatType, int) } void random_roi() - { + { #ifdef RANDOMROI //randomize ROI - cv::RNG &rng = TS::ptr()->get_rng(); + cv::RNG &rng = TS::ptr()->get_rng(); roicols = rng.uniform(1, mat.cols); roirows = rng.uniform(1, mat.rows); srcx = rng.uniform(0, mat.cols - roicols); @@ -368,26 +368,26 @@ TEST_P(Split, Accuracy) sprintf(sss, "roicols=%d,roirows=%d,dst1x =%d,dsty=%d,dst2x =%d,dst2y=%d,dst3x =%d,dst3y=%d,dst4x =%d,dst4y=%d,srcx=%d,srcy=%d", roicols, roirows, dst1x , dst1y, dst2x , dst2y, dst3x , dst3y, dst4x , dst4y, srcx, srcy); if(channels >= 1) - EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss); + EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss); if(channels >= 2) - EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss); + EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss); if(channels >= 3) - EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss); + EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss); if(channels >= 4) - EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss); + EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss); } } INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4))); + Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4))); INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4))); - + Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4))); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 2ea4e5d..4b21081 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -207,7 +207,7 @@ vector types(int depth_start, int depth_end, int cn_start, int cn_end) return v; } -const vector& all_types() +const vector &all_types() { static vector v = types(CV_8U, CV_64F, 1, 4); diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index e4742c4..4ebf129 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -112,7 +112,7 @@ using perf::MatType; std::vector types(int depth_start, int depth_end, int cn_start, int cn_end); //! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4). -const std::vector& all_types(); +const std::vector &all_types(); class Inverse { -- 2.7.4