//////////////////////////////// oclMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
- inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0) {}
+ inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {}
- inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type );
}
- inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type );
}
inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if(_rows > 0 && _cols > 0)
{
}
inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
if( _size.height > 0 && _size.width > 0 )
{
inline oclMat::oclMat(const oclMat &m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data),
- refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
+ refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
{
if( refcount )
CV_XADD(refcount, 1);
}
-
+
inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
- datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
- cv::Mat m(_rows,_cols,_type,_data,_step);
- upload(m);
+ cv::Mat m(_rows, _cols, _type, _data, _step);
+ upload(m);
//size_t minstep = cols * elemSize();
//if( step == Mat::AUTO_STEP )
//{
//}
//dataend += step * (rows - 1) + minstep;
}
-
+
inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0),
step(0), data(0), refcount(0),
- datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0), download_channels(0)
+ datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
{
- cv::Mat m(_size,_type,_data,_step);
- upload(m);
+ cv::Mat m(_size, _type, _data, _step);
+ upload(m);
//size_t minstep = cols * elemSize();
//if( step == Mat::AUTO_STEP )
//{
wholerows = m.wholerows;
wholecols = m.wholecols;
offset = m.offset;
- download_channels = m.download_channels;
if( rowRange == Range::all() )
rows = m.rows;
else
inline oclMat::oclMat(const oclMat &m, const Rect &roi)
: flags(m.flags), rows(roi.height), cols(roi.width),
step(m.step), data(m.data), refcount(m.refcount),
- datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), download_channels(m.download_channels)
+ datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
{
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
offset += roi.y * step + roi.x * elemSize();
}
inline oclMat::oclMat(const Mat &m)
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0), download_channels(0)
+ : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0)
{
//clCxt = Context::getContext();
upload(m);
wholerows = m.wholerows;
wholecols = m.wholecols;
refcount = m.refcount;
- download_channels = m.download_channels;
}
return *this;
}
std::swap( dataend, b.dataend );
std::swap( refcount, b.refcount );
std::swap( offset, b.offset );
- std::swap( clCxt, b.clCxt );
+ std::swap( clCxt, b.clCxt );
std::swap( wholerows, b.wholerows );
std::swap( wholecols, b.wholecols );
- std::swap( download_channels, b.download_channels);
}
inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
rows = row2 - row1;
cols = col2 - col1;
- if( esz *cols == step || rows == 1 )
+ if( esz * cols == step || rows == 1 )
flags |= Mat::CONTINUOUS_FLAG;
else
flags &= ~Mat::CONTINUOUS_FLAG;
}
inline size_t oclMat::elemSize() const
{
- return CV_ELEM_SIZE(flags);
+ return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels())));
}
inline size_t oclMat::elemSize1() const
{
{
return CV_MAT_TYPE(flags);
}
+ inline int oclMat::ocltype() const
+ {
+ return CV_MAKE_TYPE(depth(), oclchannels());
+ }
inline int oclMat::depth() const
{
return CV_MAT_DEPTH(flags);
{
return CV_MAT_CN(flags);
}
+ inline int oclMat::oclchannels() const
+ {
+ return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags));
+ }
inline size_t oclMat::step1() const
{
return step / elemSize1();
}
-
+
inline uchar *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
- CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
+ CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
inline const uchar *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
- CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
+ CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
return data + step * y;
}
template<typename _Tp> inline _Tp *oclMat::ptr(int y)
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
- CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
+ CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
return (_Tp *)(data + step * y);
}
template<typename _Tp> inline const _Tp *oclMat::ptr(int y) const
{
CV_DbgAssert( (unsigned)y < (unsigned)rows );
- CV_Error(CV_GpuNotSupported,"This function hasn't been supported yet.\n");
+ CV_Error(CV_GpuNotSupported, "This function hasn't been supported yet.\n");
return (const _Tp *)(data + step * y);
}
a.swap(b);
}
- inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat& m)
- {
- if (m.type() == type && m.rows >= rows && m.cols >= cols)
- m = m(Rect(0, 0, cols, rows));
- else
- m.create(rows, cols, type);
- }
+ inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat &m)
+ {
+ if (m.type() == type && m.rows >= rows && m.cols >= cols)
+ m = m(Rect(0, 0, cols, rows));
+ else
+ m.create(rows, cols, type);
+ }
+
+ inline void ensureSizeIsEnough(Size size, int type, oclMat &m)
+ {
+ ensureSizeIsEnough(size.height, size.width, type, m);
+ }
+
- inline void ensureSizeIsEnough(Size size, int type, oclMat& m)
- {
- ensureSizeIsEnough(size.height, size.width, type, m);
- }
} /* end of namespace ocl */
} /* end of namespace cv */
namespace ocl
{
using std::auto_ptr;
-
+
#define CVCL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CVCL_DEVICE_TYPE_CPU (1 << 1)
#define CVCL_DEVICE_TYPE_GPU (1 << 2)
~Info();
void release();
Info &operator = (const Info &m);
+ std::vector<string> DeviceName;
};
//////////////////////////////// Initialization & Info ////////////////////////
//this function may be obsoleted
//CV_EXPORTS cl_device_id getDevice();
//the function must be called before any other cv::ocl::functions, it initialize ocl runtime
- CV_EXPORTS int getDevice(std::vector<Info>& oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+ CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
//set device you want to use, optional function after getDevice be called
CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
//this function is not ready yet
//CV_EXPORTS void getComputeCapability(cl_device_id device, int &major, int &minor);
//optional function, if you want save opencl binary kernel to the file, set its path
CV_EXPORTS void setBinpath(const char *path);
- //The two functions below are used to get opencl runtime so that opencv can interactive with \r
- //other opencl program\r
- CV_EXPORTS void* getoclContext();\r
- CV_EXPORTS void* getoclCommandQueue();
+ //The two functions below are used to get opencl runtime so that opencv can interactive with
+
+ //other opencl program
+
+ CV_EXPORTS void *getoclContext();
+
+ CV_EXPORTS void *getoclCommandQueue();
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
//////////////////////////////// oclMat ////////////////////////////////
class CV_EXPORTS oclMat
{
- public:
+ public:
//! default constructor
oclMat();
//! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
size_t elemSize1() const;
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
int type() const;
+ //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
+ //! 3 channels element actually use 4 channel space
+ int ocltype() const;
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
int depth() const;
//! returns element type, similar to CV_MAT_CN(cvMat->type)
int channels() const;
+ //! returns element type, return 4 for 3 channels element,
+ //!becuase 3 channels element actually use 4 channel space
+ int oclchannels() const;
//! returns step/elemSize1()
size_t step1() const;
//! returns oclMatrix size:
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
int wholerows;
int wholecols;
- //add download_channels for 3 channels to 4 channels
- int download_channels;
};
///////////////////// mat split and merge /////////////////////////////////
//#else
//typedef float F;
//#endif
- // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c);
+ // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c);
CV_EXPORTS void addWeighted(const oclMat &a, double alpha, const oclMat &b, double beta, double gama, oclMat &c);
//! adds one matrix to another (c = a + b)
// supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4
// support all types
CV_EXPORTS Scalar sum(const oclMat &m);
+ CV_EXPORTS Scalar sqrSum(const oclMat &m);
+
//! finds global minimum and maximum array elements and returns their values
- // support all C1 types\r
+ // support all C1 types
+
CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat());
//! finds global minimum and maximum array elements and returns their values with locations
- // support all C1 types\r
+ // support all C1 types
+
CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,
- const oclMat &mask = oclMat());
+ const oclMat &mask = oclMat());
//! counts non-zero array elements
// support all types
// supports all types
CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat());
CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat());
- //! computes convolution of two images \r
- //! support only CV_32FC1 type\r
- CV_EXPORTS void convolve(const oclMat& image,const oclMat& temp1, oclMat& result);\r
+ //! computes convolution of two images
+
+ //! support only CV_32FC1 type
+
+ CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result);
+
//! Logical operators
CV_EXPORTS oclMat operator ~ (const oclMat &src);
*/
class CV_EXPORTS BaseRowFilter_GPU
{
- public:
- BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- virtual ~BaseRowFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- int ksize, anchor, bordertype;
+ public:
+ BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseRowFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
};
/*!
*/
class CV_EXPORTS BaseColumnFilter_GPU
{
- public:
- BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
- virtual ~BaseColumnFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- int ksize, anchor, bordertype;
+ public:
+ BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
+ virtual ~BaseColumnFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ int ksize, anchor, bordertype;
};
/*!
*/
class CV_EXPORTS BaseFilter_GPU
{
- public:
- BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
- : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
- virtual ~BaseFilter_GPU() {}
- virtual void operator()(const oclMat &src, oclMat &dst) = 0;
- Size ksize;
- Point anchor;
- int borderType;
+ public:
+ BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
+ : ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
+ virtual ~BaseFilter_GPU() {}
+ virtual void operator()(const oclMat &src, oclMat &dst) = 0;
+ Size ksize;
+ Point anchor;
+ int borderType;
};
/*!
*/
class CV_EXPORTS FilterEngine_GPU
{
- public:
- virtual ~FilterEngine_GPU() {}
+ public:
+ virtual ~FilterEngine_GPU() {}
- virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
+ virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
};
//! returns the non-separable filter engine with the specified filter
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
//! returns the separable filter engine with the specified filters
- CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
- const Ptr<BaseColumnFilter_GPU>& columnFilter);
+ CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ const Ptr<BaseColumnFilter_GPU> &columnFilter);
//! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
//! returns filter engine for the generalized Sobel operator
- CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType=BORDER_DEFAULT );
+ CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
//! applies Laplacian operator to the image
// supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type
// supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
// supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP
CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize,
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
//! returns 2D morphological filter
//! only MORPH_ERODE and MORPH_DILATE are supported
// supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
// supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101
static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1),
- int borderType = BORDER_CONSTANT)
+ int borderType = BORDER_CONSTANT)
{
boxFilter(src, dst, -1, ksize, anchor, borderType);
}
//! applies non-separable 2D linear filter to the image
CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel,
- Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
+ Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
//! applies separable 2D linear filter to the image
CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY,
- Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
+ Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
//! applies generalized Sobel operator to the image
// dst.type must equalize src.type
//! erodes the image (applies the local minimum operator)
// supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,\r
- int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue());\r
+ CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
//! dilates the image (applies the local maximum operator)
// supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4
- CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,\r
- int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue());\r
+ CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
//! applies an advanced morphological operation to the image
- CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,\r
- int borderType=BORDER_CONSTANT,const Scalar& borderValue=morphologyDefaultBorderValue());\r
+ CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1,
+
+ int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue());
+
////////////////////////////// Image processing //////////////////////////////
//! Does mean shift filtering on GPU.
CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
//! Does mean shift procedure on GPU.
CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
//! Does mean shift segmentation with elimiation of small regions.
CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize,
- TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
+ TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
//! applies fixed threshold to the image.
// supports CV_8UC1 and CV_32FC1 data type
// supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types
CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR);
- //! Applies a generic geometrical transformation to an image.\r
- // Supports INTER_NEAREST, INTER_LINEAR.\r
- // Map1 supports CV_16SC2, CV_32FC2 types.\r
- // Src supports CV_8UC1, CV_8UC2, CV_8UC4.\r
- CV_EXPORTS void remap(const oclMat& src, oclMat& dst, oclMat& map1, oclMat& map2, int interpolation, int bordertype, const Scalar& value = Scalar());\r
+ //! Applies a generic geometrical transformation to an image.
+
+ // Supports INTER_NEAREST, INTER_LINEAR.
+
+ // Map1 supports CV_16SC2, CV_32FC2 types.
+
+ // Src supports CV_8UC1, CV_8UC2, CV_8UC4.
+
+ CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar());
+
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant
// supports CV_8UC1, CV_8UC4, CV_32SC1 types
CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar());
~OclCascadeClassifier() {};
CvSeq *oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor,
- int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0));
+ };
+
+
+
+ /////////////////////////////// Pyramid /////////////////////////////////////
+ CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst);
+
+ //! upsamples the source image and then smoothes it
+ CV_EXPORTS void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst);
+
+ //! performs linear blending of two images
+ //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
+ // supports only CV_8UC1 source type
+ CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result);
+
+ //! computes vertical sum, supports only CV_32FC1 images
+ CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum);
+
+ ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
+ struct CV_EXPORTS MatchTemplateBuf
+ {
+ Size user_block_size;
+ oclMat imagef, templf;
+ std::vector<oclMat> images;
+ std::vector<oclMat> image_sums;
+ std::vector<oclMat> image_sqsums;
};
- //! computes vertical sum, supports only CV_32FC1 images
- CV_EXPORTS void columnSum(const oclMat& src, oclMat& sum);
-
- //! performs linear blending of two images
- //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
- // supports only CV_8UC1 source type
- CV_EXPORTS void blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result);
-
- /////////////////////////////// Pyramid /////////////////////////////////////
- CV_EXPORTS void pyrDown(const oclMat& src, oclMat& dst);
-
- //! upsamples the source image and then smoothes it
- CV_EXPORTS void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst);
-
- ///////////////////////////////////////// match_template /////////////////////////////////////////////////////////////
- struct CV_EXPORTS MatchTemplateBuf
- {
- Size user_block_size;
- oclMat imagef, templf;
- std::vector<oclMat> images;
- std::vector<oclMat> image_sums;
- std::vector<oclMat> image_sqsums;
- };
-
-
- //! computes the proximity map for the raster template and the image where the template is searched for
- // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method);
-
- //! computes the proximity map for the raster template and the image where the template is searched for
- // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
- // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
- CV_EXPORTS void matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf);
-
- \r
- ///////////////////////////////////////////// Canny /////////////////////////////////////////////\r
- struct CV_EXPORTS CannyBuf;\r
-\r
- //! compute edges of the input image using Canny operator\r
- // Support CV_8UC1 only\r
- CV_EXPORTS void Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);\r
- CV_EXPORTS void Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);\r
- CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);\r
- CV_EXPORTS void Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false);\r
-\r
- struct CV_EXPORTS CannyBuf\r
- {\r
- CannyBuf() : counter(NULL) {}\r
- ~CannyBuf() { release(); }\r
- explicit CannyBuf(const Size& image_size, int apperture_size = 3) : counter(NULL)\r
- {\r
- create(image_size, apperture_size);\r
- }\r
- CannyBuf(const oclMat& dx_, const oclMat& dy_);\r
-\r
- void create(const Size& image_size, int apperture_size = 3);\r
-\r
- void release();\r
-\r
- oclMat dx, dy;\r
- oclMat dx_buf, dy_buf;\r
- oclMat edgeBuf;\r
- oclMat trackBuf1, trackBuf2;\r
- void * counter;\r
- Ptr<FilterEngine_GPU> filterDX, filterDY;\r
- };
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method);
+
+ //! computes the proximity map for the raster template and the image where the template is searched for
+ // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4
+ // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4
+ CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf);
+
+
+
+ ///////////////////////////////////////////// Canny /////////////////////////////////////////////
+
+ struct CV_EXPORTS CannyBuf;
+
+
+
+ //! compute edges of the input image using Canny operator
+
+ // Support CV_8UC1 only
+
+ CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+
+ CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false);
+
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+
+ CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false);
+
+
+
+ struct CV_EXPORTS CannyBuf
+
+ {
+
+ CannyBuf() : counter(NULL) {}
+
+ ~CannyBuf()
+ {
+ release();
+ }
+
+ explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL)
+
+ {
+
+ create(image_size, apperture_size);
+
+ }
+
+ CannyBuf(const oclMat &dx_, const oclMat &dy_);
+
+
+
+ void create(const Size &image_size, int apperture_size = 3);
+
+
+
+ void release();
+
+
+
+ oclMat dx, dy;
+
+ oclMat dx_buf, dy_buf;
+
+ oclMat edgeBuf;
+
+ oclMat trackBuf1, trackBuf2;
+
+ void *counter;
+
+ Ptr<FilterEngine_GPU> filterDX, filterDY;
+
+ };
#ifdef HAVE_CLAMDFFT
- ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
- // the two functions must be called before/after run any fft library functions.
- CV_EXPORTS void fft_setup(); // this will be implicitly invoked
- CV_EXPORTS void fft_teardown(); // you need to teardown fft library manually
-
- /////////////////////////////////////// DFT /////////////////////////////////////////////////////
- //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
- //! Param dft_size is the size of DFT transform.
- //!
- //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
- // support src type of CV32FC1, CV32FC2
- // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
- // dft_size is the size of original input, which is used for transformation from complex to real.
- // dft_size must be powers of 2, 3 and 5
- // real to complex dft requires at least v1.8 clAmdFft
- // real to complex dft output is not the same with cpu version
- // real to complex and complex to real does not support DFT_ROWS
- CV_EXPORTS void dft(const oclMat& src, oclMat& dst, Size dft_size = Size(0, 0), int flags = 0);
+ ///////////////////////////////////////// clAmdFft related /////////////////////////////////////////
+ //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix.
+ //! Param dft_size is the size of DFT transform.
+ //!
+ //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format.
+ // support src type of CV32FC1, CV32FC2
+ // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS
+ // dft_size is the size of original input, which is used for transformation from complex to real.
+ // dft_size must be powers of 2, 3 and 5
+ // real to complex dft requires at least v1.8 clAmdFft
+ // real to complex dft output is not the same with cpu version
+ // real to complex and complex to real does not support DFT_ROWS
+ CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0);
#endif // HAVE_CLAMDFFT
#ifdef HAVE_CLAMDBLAS
- //! implements generalized matrix product algorithm GEMM from BLAS
- // The functionality requires clAmdBlas library
- // only support type CV_32FC1
- // flag GEMM_3_T is not supported
- CV_EXPORTS void gemm(const oclMat& src1, const oclMat& src2, double alpha,
- const oclMat& src3, double beta, oclMat& dst, int flags = 0);
+ //! implements generalized matrix product algorithm GEMM from BLAS
+ // The functionality requires clAmdBlas library
+ // only support type CV_32FC1
+ // flag GEMM_3_T is not supported
+ CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ const oclMat &src3, double beta, oclMat &dst, int flags = 0);
#endif
- //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////\r
- struct CV_EXPORTS HOGDescriptor\r
- {\r
- enum { DEFAULT_WIN_SIGMA = -1 };\r
- enum { DEFAULT_NLEVELS = 64 };\r
- enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };\r
-\r
- HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),\r
- Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),\r
- int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,\r
- double threshold_L2hys=0.2, bool gamma_correction=true,\r
- int nlevels=DEFAULT_NLEVELS);\r
-\r
- size_t getDescriptorSize() const;\r
- size_t getBlockHistogramSize() const;\r
-\r
- void setSVMDetector(const vector<float>& detector);\r
-\r
- static vector<float> getDefaultPeopleDetector();\r
- static vector<float> getPeopleDetector48x96();\r
- static vector<float> getPeopleDetector64x128();\r
-\r
- void detect(const oclMat& img, vector<Point>& found_locations,\r
- double hit_threshold=0, Size win_stride=Size(),\r
- Size padding=Size());\r
-\r
- void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,\r
- double hit_threshold=0, Size win_stride=Size(),\r
- Size padding=Size(), double scale0=1.05,\r
- int group_threshold=2);\r
-\r
- void getDescriptors(const oclMat& img, Size win_stride,\r
- oclMat& descriptors,\r
- int descr_format=DESCR_FORMAT_COL_BY_COL);\r
-\r
- Size win_size;\r
- Size block_size;\r
- Size block_stride;\r
- Size cell_size;\r
- int nbins;\r
- double win_sigma;\r
- double threshold_L2hys;\r
- bool gamma_correction;\r
- int nlevels;\r
-\r
- protected:\r
- // initialize buffers; only need to do once in case of multiscale detection\r
- void init_buffer(const oclMat& img, Size win_stride);\r
-\r
- void computeBlockHistograms(const oclMat& img);\r
- void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);\r
-\r
- double getWinSigma() const;\r
- bool checkDetectorSize() const;\r
-\r
- static int numPartsWithin(int size, int part_size, int stride);\r
- static Size numPartsWithin(Size size, Size part_size, Size stride);\r
-\r
- // Coefficients of the separating plane\r
- float free_coef;\r
- oclMat detector;\r
-\r
- // Results of the last classification step\r
- oclMat labels;\r
- Mat labels_host;\r
-\r
- // Results of the last histogram evaluation step\r
- oclMat block_hists;\r
-\r
- // Gradients conputation results\r
- oclMat grad, qangle;\r
-\r
- // scaled image\r
- oclMat image_scale;\r
-\r
- // effect size of input image (might be different from original size after scaling)\r
- Size effect_size;\r
- };\r
-
- //! Speeded up robust features, port from GPU module.\r
- ////////////////////////////////// SURF //////////////////////////////////////////\r
- class CV_EXPORTS SURF_OCL\r
- {\r
- public:\r
- enum KeypointLayout\r
- {\r
- X_ROW = 0,\r
- Y_ROW,\r
- LAPLACIAN_ROW,\r
- OCTAVE_ROW,\r
- SIZE_ROW,\r
- ANGLE_ROW,\r
- HESSIAN_ROW,\r
- ROWS_COUNT\r
- };\r
-\r
- //! the default constructor\r
- SURF_OCL();\r
- //! the full constructor taking all the necessary parameters\r
- explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,\r
- int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);\r
-\r
- //! returns the descriptor size in float's (64 or 128)\r
- int descriptorSize() const;\r
- \r
- //! upload host keypoints to device memory\r
- void uploadKeypoints(const vector<cv::KeyPoint>& keypoints, oclMat& keypointsocl);\r
- //! download keypoints from device to host memory\r
- void downloadKeypoints(const oclMat& keypointsocl, vector<KeyPoint>& keypoints);\r
-\r
- //! download descriptors from device to host memory\r
- void downloadDescriptors(const oclMat& descriptorsocl, vector<float>& descriptors);\r
-\r
- //! finds the keypoints using fast hessian detector used in SURF\r
- //! supports CV_8UC1 images\r
- //! keypoints will have nFeature cols and 6 rows\r
- //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature\r
- //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature\r
- //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature\r
- //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature\r
- //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature\r
- //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature\r
- //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature\r
- void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints);\r
- //! finds the keypoints and computes their descriptors.\r
- //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction\r
- void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,\r
- bool useProvidedKeypoints = false);\r
-\r
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints);\r
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors,\r
- bool useProvidedKeypoints = false);\r
-\r
- void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,\r
- bool useProvidedKeypoints = false);\r
-\r
- void releaseMemory();\r
-\r
- // SURF parameters\r
- float hessianThreshold;\r
- int nOctaves;\r
- int nOctaveLayers;\r
- bool extended;\r
- bool upright;\r
-\r
- //! max keypoints = min(keypointsRatio * img.size().area(), 65535)\r
- float keypointsRatio;\r
-\r
- oclMat sum, mask1, maskSum, intBuffer;\r
-\r
- oclMat det, trace;\r
-\r
- oclMat maxPosBuffer;\r
-\r
- };\r
- ////////////////////////////////// BruteForceMatcher //////////////////////////////////\r
-\r
- class CV_EXPORTS BruteForceMatcher_OCL_base\r
- {\r
- public:\r
- enum DistType {L1Dist = 0, L2Dist, HammingDist};\r
-\r
- explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);\r
-\r
- // Add descriptors to train descriptor collection\r
- void add(const std::vector<oclMat>& descCollection);\r
-\r
- // Get train descriptors collection\r
- const std::vector<oclMat>& getTrainDescriptors() const;\r
-\r
- // Clear train descriptors collection\r
- void clear();\r
-\r
- // Return true if there are not train descriptors in collection\r
- bool empty() const;\r
-\r
- // Return true if the matcher supports mask in match methods\r
- bool isMaskSupported() const;\r
-\r
- // Find one best match for each query descriptor\r
- void matchSingle(const oclMat& query, const oclMat& train,\r
- oclMat& trainIdx, oclMat& distance,\r
- const oclMat& mask = oclMat());\r
-\r
- // Download trainIdx and distance and convert it to CPU vector with DMatch\r
- static void matchDownload(const oclMat& trainIdx, const oclMat& distance, std::vector<DMatch>& matches);\r
- // Convert trainIdx and distance to vector with DMatch\r
- static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);\r
-\r
- // Find one best match for each query descriptor\r
- void match(const oclMat& query, const oclMat& train, std::vector<DMatch>& matches, const oclMat& mask = oclMat());\r
-\r
- // Make gpu collection of trains and masks in suitable format for matchCollection function\r
- void makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const std::vector<oclMat>& masks = std::vector<oclMat>());\r
-\r
- // Find one best match from train collection for each query descriptor\r
- void matchCollection(const oclMat& query, const oclMat& trainCollection,\r
- oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,\r
- const oclMat& masks = oclMat());\r
-\r
- // Download trainIdx, imgIdx and distance and convert it to vector with DMatch\r
- static void matchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector<DMatch>& matches);\r
- // Convert trainIdx, imgIdx and distance to vector with DMatch\r
- static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);\r
-\r
- // Find one best match from train collection for each query descriptor.\r
- void match(const oclMat& query, std::vector<DMatch>& matches, const std::vector<oclMat>& masks = std::vector<oclMat>());\r
-\r
- // Find k best matches for each query descriptor (in increasing order of distances)\r
- void knnMatchSingle(const oclMat& query, const oclMat& train,\r
- oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k,\r
- const oclMat& mask = oclMat());\r
-\r
- // Download trainIdx and distance and convert it to vector with DMatch\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- static void knnMatchDownload(const oclMat& trainIdx, const oclMat& distance,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
- // Convert trainIdx and distance to vector with DMatch\r
- static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
-\r
- // Find k best matches for each query descriptor (in increasing order of distances).\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- void knnMatch(const oclMat& query, const oclMat& train,\r
- std::vector< std::vector<DMatch> >& matches, int k, const oclMat& mask = oclMat(),\r
- bool compactResult = false);\r
-\r
- // Find k best matches from train collection for each query descriptor (in increasing order of distances)\r
- void knnMatch2Collection(const oclMat& query, const oclMat& trainCollection,\r
- oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,\r
- const oclMat& maskCollection = oclMat());\r
-\r
- // Download trainIdx and distance and convert it to vector with DMatch\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- static void knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
- // Convert trainIdx and distance to vector with DMatch\r
- static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
-\r
- // Find k best matches for each query descriptor (in increasing order of distances).\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- void knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, int k,\r
- const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);\r
-\r
- // Find best matches for each query descriptor which have distance less than maxDistance.\r
- // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.\r
- // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,\r
- // because it didn't have enough memory.\r
- // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),\r
- // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches\r
- // Matches doesn't sorted.\r
- void radiusMatchSingle(const oclMat& query, const oclMat& train,\r
- oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance,\r
- const oclMat& mask = oclMat());\r
-\r
- // Download trainIdx, nMatches and distance and convert it to vector with DMatch.\r
- // matches will be sorted in increasing order of distances.\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
- // Convert trainIdx, nMatches and distance to vector with DMatch.\r
- static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
-\r
- // Find best matches for each query descriptor which have distance less than maxDistance\r
- // in increasing order of distances).\r
- void radiusMatch(const oclMat& query, const oclMat& train,\r
- std::vector< std::vector<DMatch> >& matches, float maxDistance,\r
- const oclMat& mask = oclMat(), bool compactResult = false);\r
-\r
- // Find best matches for each query descriptor which have distance less than maxDistance.\r
- // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),\r
- // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches\r
- // Matches doesn't sorted.\r
- void radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance,\r
- const std::vector<oclMat>& masks = std::vector<oclMat>());\r
-\r
- // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.\r
- // matches will be sorted in increasing order of distances.\r
- // compactResult is used when mask is not empty. If compactResult is false matches\r
- // vector will have the same size as queryDescriptors rows. If compactResult is true\r
- // matches vector will not contain matches for fully masked out query descriptors.\r
- static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
- // Convert trainIdx, nMatches and distance to vector with DMatch.\r
- static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,\r
- std::vector< std::vector<DMatch> >& matches, bool compactResult = false);\r
-\r
- // Find best matches from train collection for each query descriptor which have distance less than\r
- // maxDistance (in increasing order of distances).\r
- void radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,\r
- const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);\r
-\r
- DistType distType;\r
-\r
- private:\r
- std::vector<oclMat> trainDescCollection;\r
- };\r
-\r
- template <class Distance>\r
- class CV_EXPORTS BruteForceMatcher_OCL;\r
-\r
- template <typename T>\r
- class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base\r
- {\r
- public:\r
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}\r
- explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}\r
- };\r
- template <typename T>\r
- class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base\r
- {\r
- public:\r
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}\r
- explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}\r
- };\r
- template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base\r
- {\r
- public:\r
- explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}\r
- explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}\r
- };\r
-\r
- /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////\r
- class CV_EXPORTS PyrLKOpticalFlow\r
- {\r
- public:\r
- PyrLKOpticalFlow()\r
- {\r
- winSize = Size(21, 21);\r
- maxLevel = 3;\r
- iters = 30;\r
- derivLambda = 0.5;\r
- useInitialFlow = false;\r
- minEigThreshold = 1e-4f;\r
- getMinEigenVals = false;\r
- isDeviceArch11_ = false;\r
- }\r
-\r
- void sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts,\r
- oclMat& status, oclMat* err = 0);\r
-\r
- void dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0);\r
-\r
- Size winSize;\r
- int maxLevel;\r
- int iters;\r
- double derivLambda;\r
- bool useInitialFlow;\r
- float minEigThreshold;\r
- bool getMinEigenVals;\r
-\r
- void releaseMemory()\r
- {\r
- dx_calcBuf_.release();\r
- dy_calcBuf_.release();\r
-\r
- prevPyr_.clear();\r
- nextPyr_.clear();\r
-\r
- dx_buf_.release();\r
- dy_buf_.release();\r
- }\r
-\r
- private:\r
- void calcSharrDeriv(const oclMat& src, oclMat& dx, oclMat& dy);\r
-\r
- void buildImagePyramid(const oclMat& img0, vector<oclMat>& pyr, bool withBorder);\r
-\r
- oclMat dx_calcBuf_;\r
- oclMat dy_calcBuf_;\r
-\r
- vector<oclMat> prevPyr_;\r
- vector<oclMat> nextPyr_;\r
-\r
- oclMat dx_buf_;\r
- oclMat dy_buf_;\r
-\r
- oclMat uPyr_[2];\r
- oclMat vPyr_[2];\r
-\r
- bool isDeviceArch11_;\r
- };\r
-\r
+ //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
+
+ struct CV_EXPORTS HOGDescriptor
+
+ {
+
+ enum { DEFAULT_WIN_SIGMA = -1 };
+
+ enum { DEFAULT_NLEVELS = 64 };
+
+ enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
+
+
+
+ HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16),
+
+ Size block_stride = Size(8, 8), Size cell_size = Size(8, 8),
+
+ int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA,
+
+ double threshold_L2hys = 0.2, bool gamma_correction = true,
+
+ int nlevels = DEFAULT_NLEVELS);
+
+
+
+ size_t getDescriptorSize() const;
+
+ size_t getBlockHistogramSize() const;
+
+
+
+ void setSVMDetector(const vector<float> &detector);
+
+
+
+ static vector<float> getDefaultPeopleDetector();
+
+ static vector<float> getPeopleDetector48x96();
+
+ static vector<float> getPeopleDetector64x128();
+
+
+
+ void detect(const oclMat &img, vector<Point> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size());
+
+
+
+ void detectMultiScale(const oclMat &img, vector<Rect> &found_locations,
+
+ double hit_threshold = 0, Size win_stride = Size(),
+
+ Size padding = Size(), double scale0 = 1.05,
+
+ int group_threshold = 2);
+
+
+
+ void getDescriptors(const oclMat &img, Size win_stride,
+
+ oclMat &descriptors,
+
+ int descr_format = DESCR_FORMAT_COL_BY_COL);
+
+
+
+ Size win_size;
+
+ Size block_size;
+
+ Size block_stride;
+
+ Size cell_size;
+
+ int nbins;
+
+ double win_sigma;
+
+ double threshold_L2hys;
+
+ bool gamma_correction;
+
+ int nlevels;
+
+
+
+ protected:
+
+ // initialize buffers; only need to do once in case of multiscale detection
+
+ void init_buffer(const oclMat &img, Size win_stride);
+
+
+
+ void computeBlockHistograms(const oclMat &img);
+
+ void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle);
+
+
+
+ double getWinSigma() const;
+
+ bool checkDetectorSize() const;
+
+
+
+ static int numPartsWithin(int size, int part_size, int stride);
+
+ static Size numPartsWithin(Size size, Size part_size, Size stride);
+
+
+
+ // Coefficients of the separating plane
+
+ float free_coef;
+
+ oclMat detector;
+
+
+
+ // Results of the last classification step
+
+ oclMat labels;
+
+ Mat labels_host;
+
+
+
+ // Results of the last histogram evaluation step
+
+ oclMat block_hists;
+
+
+
+ // Gradients conputation results
+
+ oclMat grad, qangle;
+
+
+
+ // scaled image
+
+ oclMat image_scale;
+
+
+
+ // effect size of input image (might be different from original size after scaling)
+
+ Size effect_size;
+
+ };
+
+
+
+ //! Speeded up robust features, port from GPU module.
+ ////////////////////////////////// SURF //////////////////////////////////////////
+
+ class CV_EXPORTS SURF_OCL
+
+ {
+
+ public:
+
+ enum KeypointLayout
+
+ {
+
+ X_ROW = 0,
+
+ Y_ROW,
+
+ LAPLACIAN_ROW,
+
+ OCTAVE_ROW,
+
+ SIZE_ROW,
+
+ ANGLE_ROW,
+
+ HESSIAN_ROW,
+
+ ROWS_COUNT
+
+ };
+
+
+
+ //! the default constructor
+
+ SURF_OCL();
+
+ //! the full constructor taking all the necessary parameters
+
+ explicit SURF_OCL(double _hessianThreshold, int _nOctaves = 4,
+
+ int _nOctaveLayers = 2, bool _extended = false, float _keypointsRatio = 0.01f, bool _upright = false);
+
+
+
+ //! returns the descriptor size in float's (64 or 128)
+
+ int descriptorSize() const;
+
+
+
+ //! upload host keypoints to device memory
+
+ void uploadKeypoints(const vector<cv::KeyPoint> &keypoints, oclMat &keypointsocl);
+
+ //! download keypoints from device to host memory
+
+ void downloadKeypoints(const oclMat &keypointsocl, vector<KeyPoint> &keypoints);
+
+
+
+ //! download descriptors from device to host memory
+
+ void downloadDescriptors(const oclMat &descriptorsocl, vector<float> &descriptors);
+
+
+
+ //! finds the keypoints using fast hessian detector used in SURF
+
+ //! supports CV_8UC1 images
+
+ //! keypoints will have nFeature cols and 6 rows
+
+ //! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
+
+ //! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
+
+ //! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
+
+ //! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
+
+ //! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
+
+ //! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
+
+ //! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
+
+ void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints);
+
+ //! finds the keypoints and computes their descriptors.
+
+ //! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
+
+ void operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
+
+ bool useProvidedKeypoints = false);
+
+
+
+ void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints);
+
+ void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, oclMat &descriptors,
+
+ bool useProvidedKeypoints = false);
+
+
+
+ void operator()(const oclMat &img, const oclMat &mask, std::vector<KeyPoint> &keypoints, std::vector<float> &descriptors,
+
+ bool useProvidedKeypoints = false);
+
+
+
+ void releaseMemory();
+
+
+
+ // SURF parameters
+
+ float hessianThreshold;
+
+ int nOctaves;
+
+ int nOctaveLayers;
+
+ bool extended;
+
+ bool upright;
+
+
+
+ //! max keypoints = min(keypointsRatio * img.size().area(), 65535)
+
+ float keypointsRatio;
+
+
+
+ oclMat sum, mask1, maskSum, intBuffer;
+
+
+
+ oclMat det, trace;
+
+
+
+ oclMat maxPosBuffer;
+
+ };
+
+ ////////////////////////feature2d_ocl/////////////////
+ /****************************************************************************************\
+ * Distance *
+ \****************************************************************************************/
+
+ template<typename T>
+ struct CV_EXPORTS Accumulator
+ {
+ typedef T Type;
+ };
+
+ template<> struct Accumulator<unsigned char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<unsigned short>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<char>
+ {
+ typedef float Type;
+ };
+ template<> struct Accumulator<short>
+ {
+ typedef float Type;
+ };
+
+ /*
+ * Manhattan distance (city block distance) functor
+ */
+ template<class T>
+ struct CV_EXPORTS L1
+ {
+ enum { normType = NORM_L1 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return normL1<ValueType, ResultType>(a, b, size);
+ }
+ };
+
+ /*
+ * Euclidean distance functor
+ */
+ template<class T>
+ struct CV_EXPORTS L2
+ {
+ enum { normType = NORM_L2 };
+ typedef T ValueType;
+ typedef typename Accumulator<T>::Type ResultType;
+
+ ResultType operator()( const T *a, const T *b, int size ) const
+ {
+ return (ResultType)sqrt((double)normL2Sqr<ValueType, ResultType>(a, b, size));
+ }
+ };
+
+ /*
+ * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
+ * bit count of A exclusive XOR'ed with B
+ */
+ struct CV_EXPORTS Hamming
+ {
+ enum { normType = NORM_HAMMING };
+ typedef unsigned char ValueType;
+ typedef int ResultType;
+
+ /** this will count the bits in a ^ b
+ */
+ ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const
+ {
+ return normHamming(a, b, size);
+ }
+ };
+
+ ////////////////////////////////// BruteForceMatcher //////////////////////////////////
+
+ class CV_EXPORTS BruteForceMatcher_OCL_base
+ {
+ public:
+ enum DistType {L1Dist = 0, L2Dist, HammingDist};
+
+ explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist);
+
+
+
+ // Add descriptors to train descriptor collection
+
+ void add(const std::vector<oclMat> &descCollection);
+
+
+
+ // Get train descriptors collection
+
+ const std::vector<oclMat> &getTrainDescriptors() const;
+
+
+
+ // Clear train descriptors collection
+
+ void clear();
+
+
+
+ // Return true if there are not train descriptors in collection
+
+ bool empty() const;
+
+
+
+ // Return true if the matcher supports mask in match methods
+
+ bool isMaskSupported() const;
+
+
+
+ // Find one best match for each query descriptor
+
+ void matchSingle(const oclMat &query, const oclMat &train,
+
+ oclMat &trainIdx, oclMat &distance,
+
+ const oclMat &mask = oclMat());
+
+
+
+ // Download trainIdx and distance and convert it to CPU vector with DMatch
+
+ static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector<DMatch> &matches);
+
+ // Convert trainIdx and distance to vector with DMatch
+
+ static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector<DMatch> &matches);
+
+
+
+ // Find one best match for each query descriptor
+
+ void match(const oclMat &query, const oclMat &train, std::vector<DMatch> &matches, const oclMat &mask = oclMat());
+
+
+
+ // Make gpu collection of trains and masks in suitable format for matchCollection function
+
+ void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+
+
+ // Find one best match from train collection for each query descriptor
+
+ void matchCollection(const oclMat &query, const oclMat &trainCollection,
+
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+
+ const oclMat &masks = oclMat());
+
+
+
+ // Download trainIdx, imgIdx and distance and convert it to vector with DMatch
+
+ static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector<DMatch> &matches);
+
+ // Convert trainIdx, imgIdx and distance to vector with DMatch
+
+ static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector<DMatch> &matches);
+
+
+
+ // Find one best match from train collection for each query descriptor.
+
+ void match(const oclMat &query, std::vector<DMatch> &matches, const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+
+
+ // Find k best matches for each query descriptor (in increasing order of distances)
+
+ void knnMatchSingle(const oclMat &query, const oclMat &train,
+
+ oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k,
+
+ const oclMat &mask = oclMat());
+
+
+
+ // Download trainIdx and distance and convert it to vector with DMatch
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Convert trainIdx and distance to vector with DMatch
+
+ static void knnMatchConvert(const Mat &trainIdx, const Mat &distance,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+
+
+ // Find k best matches for each query descriptor (in increasing order of distances).
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ void knnMatch(const oclMat &query, const oclMat &train,
+
+ std::vector< std::vector<DMatch> > &matches, int k, const oclMat &mask = oclMat(),
+
+ bool compactResult = false);
+
+
+
+ // Find k best matches from train collection for each query descriptor (in increasing order of distances)
+
+ void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+
+ const oclMat &maskCollection = oclMat());
+
+
+
+ // Download trainIdx and distance and convert it to vector with DMatch
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Convert trainIdx and distance to vector with DMatch
+
+ static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+
+
+ // Find k best matches for each query descriptor (in increasing order of distances).
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ void knnMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, int k,
+
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
+
+
+ // Find best matches for each query descriptor which have distance less than maxDistance.
+
+ // nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
+
+ // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
+
+ // because it didn't have enough memory.
+
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
+
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+
+ // Matches doesn't sorted.
+
+ void radiusMatchSingle(const oclMat &query, const oclMat &train,
+
+ oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+
+ const oclMat &mask = oclMat());
+
+
+
+ // Download trainIdx, nMatches and distance and convert it to vector with DMatch.
+
+ // matches will be sorted in increasing order of distances.
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
+
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+
+
+ // Find best matches for each query descriptor which have distance less than maxDistance
+
+ // in increasing order of distances).
+
+ void radiusMatch(const oclMat &query, const oclMat &train,
+
+ std::vector< std::vector<DMatch> > &matches, float maxDistance,
+
+ const oclMat &mask = oclMat(), bool compactResult = false);
+
+
+
+ // Find best matches for each query descriptor which have distance less than maxDistance.
+
+ // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
+
+ // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
+
+ // Matches doesn't sorted.
+
+ void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance,
+
+ const std::vector<oclMat> &masks = std::vector<oclMat>());
+
+
+
+ // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
+
+ // matches will be sorted in increasing order of distances.
+
+ // compactResult is used when mask is not empty. If compactResult is false matches
+
+ // vector will have the same size as queryDescriptors rows. If compactResult is true
+
+ // matches vector will not contain matches for fully masked out query descriptors.
+
+ static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+ // Convert trainIdx, nMatches and distance to vector with DMatch.
+
+ static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+
+ std::vector< std::vector<DMatch> > &matches, bool compactResult = false);
+
+
+
+ // Find best matches from train collection for each query descriptor which have distance less than
+
+ // maxDistance (in increasing order of distances).
+
+ void radiusMatch(const oclMat &query, std::vector< std::vector<DMatch> > &matches, float maxDistance,
+
+ const std::vector<oclMat> &masks = std::vector<oclMat>(), bool compactResult = false);
+
+
+
+ DistType distType;
+
+
+
+ private:
+
+ std::vector<oclMat> trainDescCollection;
+
+ };
+
+
+
+ template <class Distance>
+
+ class CV_EXPORTS BruteForceMatcher_OCL;
+
+
+
+ template <typename T>
+
+ class CV_EXPORTS BruteForceMatcher_OCL< L1<T> > : public BruteForceMatcher_OCL_base
+
+ {
+
+ public:
+
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {}
+
+ explicit BruteForceMatcher_OCL(L1<T> /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {}
+
+ };
+
+ template <typename T>
+
+ class CV_EXPORTS BruteForceMatcher_OCL< L2<T> > : public BruteForceMatcher_OCL_base
+
+ {
+
+ public:
+
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {}
+
+ explicit BruteForceMatcher_OCL(L2<T> /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {}
+
+ };
+
+ template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base
+
+ {
+
+ public:
+
+ explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {}
+
+ explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {}
+
+ };
+
+
+
+ /////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
+
+ class CV_EXPORTS PyrLKOpticalFlow
+
+ {
+
+ public:
+
+ PyrLKOpticalFlow()
+
+ {
+
+ winSize = Size(21, 21);
+
+ maxLevel = 3;
+
+ iters = 30;
+
+ derivLambda = 0.5;
+
+ useInitialFlow = false;
+
+ minEigThreshold = 1e-4f;
+
+ getMinEigenVals = false;
+
+ isDeviceArch11_ = false;
+
+ }
+
+
+
+ void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts,
+
+ oclMat &status, oclMat *err = 0);
+
+
+
+ void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0);
+
+
+
+ Size winSize;
+
+ int maxLevel;
+
+ int iters;
+
+ double derivLambda;
+
+ bool useInitialFlow;
+
+ float minEigThreshold;
+
+ bool getMinEigenVals;
+
+
+
+ void releaseMemory()
+
+ {
+
+ dx_calcBuf_.release();
+
+ dy_calcBuf_.release();
+
+
+
+ prevPyr_.clear();
+
+ nextPyr_.clear();
+
+
+
+ dx_buf_.release();
+
+ dy_buf_.release();
+
+ }
+
+
+
+ private:
+
+ void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy);
+
+
+
+ void buildImagePyramid(const oclMat &img0, vector<oclMat> &pyr, bool withBorder);
+
+
+
+ oclMat dx_calcBuf_;
+
+ oclMat dy_calcBuf_;
+
+
+
+ vector<oclMat> prevPyr_;
+
+ vector<oclMat> nextPyr_;
+
+
+
+ oclMat dx_buf_;
+
+ oclMat dy_buf_;
+
+
+
+ oclMat uPyr_[2];
+
+ oclMat vPyr_[2];
+
+
+
+ bool isDeviceArch11_;
+
+ };
+ //////////////// build warping maps ////////////////////
+ //! builds plane warping maps
+ CV_EXPORTS void buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &);
+ //! builds cylindrical warping maps
+ CV_EXPORTS void buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &);
+ //! builds spherical warping maps
+ CV_EXPORTS void buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &);
+ //! builds Affine warping maps
+ CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ //! builds Perspective warping maps
+ CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap);
+
+ ///////////////////////////////////// interpolate frames //////////////////////////////////////////////
+ //! Interpolate frames (images) using provided optical flow (displacement field).
+ //! frame0 - frame 0 (32-bit floating point images, single channel)
+ //! frame1 - frame 1 (the same type and size)
+ //! fu - forward horizontal displacement
+ //! fv - forward vertical displacement
+ //! bu - backward horizontal displacement
+ //! bv - backward vertical displacement
+ //! pos - new frame position
+ //! newFrame - new frame
+ //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat;
+ //! occlusion masks 0, occlusion masks 1,
+ //! interpolated forward flow 0, interpolated forward flow 1,
+ //! interpolated backward flow 0, interpolated backward flow 1
+ //!
+ CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1,
+ const oclMat &fu, const oclMat &fv,
+ const oclMat &bu, const oclMat &bv,
+ float pos, oclMat &newFrame, oclMat &buf);
+
}
}
#include "opencv2/ocl/matrix_operations.hpp"
#ifndef __OPENCV_TEST_INTERPOLATION_HPP__
#define __OPENCV_TEST_INTERPOLATION_HPP__
-template <typename T> T readVal(const cv::Mat& src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+template <typename T> T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
if (border_type == cv::BORDER_CONSTANT)
return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at<T>(y, x * src.channels() + c) : cv::saturate_cast<T>(borderVal.val[c]);
template <typename T> struct NearestInterpolator
{
- static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
return readVal<T>(src, cvFloor(y), cvFloor(x), c, border_type, borderVal);
}
template <typename T> struct LinearInterpolator
{
- static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
x -= 0.5f;
y -= 0.5f;
{
static float getValue(float p[4], float x)
{
- return p[1] + 0.5 * x * (p[2] - p[0] + x*(2.0*p[0] - 5.0*p[1] + 4.0*p[2] - p[3] + x*(3.0*(p[1] - p[2]) + p[3] - p[0])));
+ return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
}
static float getValue(float p[4][4], float x, float y)
return getValue(arr, y);
}
- static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
+ static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
{
int ix = cvRound(x);
int iy = cvRound(y);
using namespace testing;
void print_info()
-{
+{
printf("\n");
#if defined _WIN32
# if defined _WIN64
- puts("OS: Windows 64");
+ puts("OS: Windows 64");
# else
- puts("OS: Windows 32");
+ puts("OS: Windows 32");
# endif
#elif defined linux
# if defined _LP64
- puts("OS: Linux 64");
+ puts("OS: Linux 64");
# else
- puts("OS: Linux 32");
+ puts("OS: Linux 32");
# endif
#elif defined __APPLE__
# if defined _LP64
- puts("OS: Apple 64");
+ puts("OS: Apple 64");
# else
- puts("OS: Apple 32");
+ puts("OS: Apple 32");
# endif
#endif
}
-int main(int argc, char** argv)
+int main(int argc, char **argv)
{
- std::vector<cv::ocl::Info> oclinfo;
+ std::vector<cv::ocl::Info> oclinfo;
TS::ptr()->init("ocl");
InitGoogleTest(&argc, argv);
print_info();
- int devnums = getDevice(oclinfo);
- if(devnums<1)
- {
- std::cout << "no device found\n";
- return -1;
- }
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
- setBinpath(CLBINPATH);
+ int devnums = getDevice(oclinfo);
+ if(devnums < 1)
+ {
+ std::cout << "no device found\n";
+ return -1;
+ }
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ setBinpath(CLBINPATH);
return RUN_ALL_TESTS();
}
using namespace std;
PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
{
- int type;
- cv::Scalar val;
-
- //src mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::Mat dst1; //bak, for two outputs
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int dstx;
- int dsty;
- int maskx;
- int masky;
-
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- cv::Mat dst1_roi; //bak
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
- cv::ocl::oclMat gdst1_whole; //bak
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gmat2;
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gdst1; //bak
- cv::ocl::oclMat gmask;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
- cv::RNG& rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
+ cv::Size size(MWIDTH, MHEIGHT);
- mat1 = randomMat(rng, size, type, 5, 16, false);
- //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
- mat2 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- dst1 = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums>0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src2x = 1;
- src1y = 1;
- src2y = 1;
- dstx = 1;
- dsty =1;
- maskx =1;
- masky =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src2x = 0;
- src1y = 0;
- src2y = 0;
- dstx = 0;
- dsty = 0;
- maskx =0;
- masky =0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- //mat2_roi = mat2(Rect(src2x,src2y,256,1));
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
-
- //gdst_whole = dst;
- //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gdst1_whole = dst1;
- //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gmat1 = mat1_roi;
- //gmat2 = mat2_roi;
- //gmask = mask_roi;
- }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty = 1;
+ maskx = 1;
+ masky = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst1_whole = dst1;
+ //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat1 = mat1_roi;
+ //gmat2 = mat2_roi;
+ //gmask = mask_roi;
+ }
};
////////////////////////////////lut/////////////////////////////////////////////////
struct Lut : ArithmTestBase {};
TEST_P(Lut, Mat)
-{
+{
- cv::Mat mat2(3, 512, CV_8UC1);
- cv::RNG& rng = TS::ptr()->get_rng();
- rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256));
+ cv::Mat mat2(3, 512, CV_8UC1);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256));
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false);
+ mat2_roi = mat2(Rect(src2x, src2y, 256, 1));
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
- mat2_roi = mat2(Rect(src2x,src2y,256,1));
-
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::LUT(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
-
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::LUT(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- // s=GetParam();
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::LUT(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::LUT(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ // s=GetParam();
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- // src2x = rng.uniform( 0,mat2.cols - 256);
- // src2y = rng.uniform (0,mat2.rows - 1);
-
- // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1));
- mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
- mat2_roi = mat2(Rect(src2x,src2y,256,1));
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- // gdst1_whole = dst1;
- // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- // gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::LUT(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ // src2x = rng.uniform( 0,mat2.cols - 256);
+ // src2y = rng.uniform (0,mat2.rows - 1);
+
+ // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false);
+ mat2_roi = mat2(Rect(src2x, src2y, 256, 1));
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ // gdst1_whole = dst1;
+ // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ // gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::LUT(gmat1, gmat2, gdst);
+ };
#endif
}
struct Exp : ArithmTestBase {};
-TEST_P(Exp, Mat)
-{
+TEST_P(Exp, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::exp(mat1_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
-
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::exp(gmat1, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download(cpu_dst);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
- //EXPECT_MAT_NEAR(dst, cpu_dst, 0,"");
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
-
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::exp(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::exp(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download(cpu_dst);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+ //EXPECT_MAT_NEAR(dst, cpu_dst, 0,"");
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::exp(gmat1, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::exp(gmat1, gdst);
+ };
#endif
}
struct Log : ArithmTestBase {};
-TEST_P(Log, Mat)
-{
+TEST_P(Log, Mat)
+{
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::log(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::log(mat1_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::log(gmat1, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::log(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::log(gmat1, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::log(gmat1, gdst);
+ };
#endif
}
struct Add : ArithmTestBase {};
-TEST_P(Add, Mat)
-{
+TEST_P(Add, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::add(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::add(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::add(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::add(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::add(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Add, Mat_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::add(gmat1, gmat2, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Add, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::add(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::add(gmat1, gmat2, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::add(gmat1, gmat2, gdst, gmask);
+ };
#endif
}
-TEST_P(Add, Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::add(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::add(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Add, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::add(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::add(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::add(gmat1, val, gdst);
+ };
#endif
}
-TEST_P(Add, Scalar_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::add(mat1_roi, val, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::add(gmat1, val, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Add, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::add(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::add(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::add(gmat1, val, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::add(gmat1, val, gdst, gmask);
+ };
#endif
}
////////////////////////////////sub/////////////////////////////////////////////////
struct Sub : ArithmTestBase {};
-TEST_P(Sub, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::subtract(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::subtract(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Sub, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::subtract(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::subtract(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::subtract(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Sub, Mat_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Sub, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::subtract(gmat1, gmat2, gdst, gmask);
+ };
#endif
}
-TEST_P(Sub, Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::subtract(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::subtract(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Sub, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::subtract(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::subtract(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::subtract(gmat1, val, gdst);
+ };
#endif
}
-TEST_P(Sub, Scalar_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::subtract(mat1_roi, val, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::subtract(gmat1, val, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Sub, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::subtract(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::subtract(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::subtract(gmat1, val, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::subtract(gmat1, val, gdst, gmask);
+ };
#endif
}
////////////////////////////////Mul/////////////////////////////////////////////////
struct Mul : ArithmTestBase {};
-TEST_P(Mul, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::multiply(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::multiply(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Mul, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::multiply(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::multiply(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::multiply(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::multiply(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Mul, Mat_Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- cv::RNG& rng = TS::ptr()->get_rng();
- double s = rng.uniform(-10.0, 10.0);
- t0 = (double)cvGetTickCount();//cpu start
- cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::multiply(gmat1, gmat2, gdst, s);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Mul, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::multiply(mat1_roi, mat2_roi, dst_roi, s);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::multiply(gmat1, gmat2, gdst, s);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- cv::RNG& rng = TS::ptr()->get_rng();
- double s = rng.uniform(-10.0, 10.0);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::multiply(gmat1, gmat2, gdst, s);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::multiply(gmat1, gmat2, gdst, s);
+ };
#endif
}
struct Div : ArithmTestBase {};
-TEST_P(Div, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::divide(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::divide(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Div, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::divide(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::divide(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::divide(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::divide(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Div, Mat_Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- cv::RNG& rng = TS::ptr()->get_rng();
- double s = rng.uniform(-10.0, 10.0);
- t0 = (double)cvGetTickCount();//cpu start
- cv::divide(mat1_roi, mat2_roi, dst_roi, s);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::divide(gmat1, gmat2, gdst, s);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Div, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::divide(mat1_roi, mat2_roi, dst_roi, s);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::divide(gmat1, gmat2, gdst, s);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- cv::RNG& rng = TS::ptr()->get_rng();
- double s = rng.uniform(-10.0, 10.0);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::divide(gmat1, gmat2, gdst, s);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ double s = rng.uniform(-10.0, 10.0);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::divide(gmat1, gmat2, gdst, s);
+ };
#endif
}
struct Absdiff : ArithmTestBase {};
-TEST_P(Absdiff, Mat)
-{
+TEST_P(Absdiff, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::absdiff(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::absdiff(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::absdiff(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::absdiff(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::absdiff(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::absdiff(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Absdiff, Mat_Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::absdiff(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::absdiff(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Absdiff, Mat_Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::absdiff(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::absdiff(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::absdiff(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::absdiff(gmat1, val, gdst);
+ };
#endif
}
struct CartToPolar : ArithmTestBase {};
-TEST_P(CartToPolar, angleInDegree)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- cv::Mat cpu_dst1;
- gdst1_whole.download(cpu_dst1);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(CartToPolar, angleInDegree)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1);
+ };
#endif
}
-TEST_P(CartToPolar, angleInRadians)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- cv::Mat cpu_dst1;
- gdst1_whole.download(cpu_dst1);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(CartToPolar, angleInRadians)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0);
+ };
#endif
}
struct PolarToCart : ArithmTestBase {};
-TEST_P(PolarToCart, angleInDegree)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- cv::Mat cpu_dst1;
- gdst1_whole.download(cpu_dst1);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(PolarToCart, angleInDegree)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1);
+ };
#endif
}
-TEST_P(PolarToCart, angleInRadians)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- cv::Mat cpu_dst1;
- gdst1_whole.download(cpu_dst1);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(PolarToCart, angleInRadians)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ cv::Mat cpu_dst1;
+ gdst1_whole.download(cpu_dst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0);
+ };
#endif
}
struct Magnitude : ArithmTestBase {};
-TEST_P(Magnitude, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::magnitude(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::magnitude(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Magnitude, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::magnitude(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::magnitude(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::magnitude(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::magnitude(gmat1, gmat2, gdst);
+ };
#endif
}
struct Transpose : ArithmTestBase {};
-TEST_P(Transpose, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::transpose(mat1_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::transpose(gmat1, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Transpose, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::transpose(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::transpose(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::transpose(gmat1, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::transpose(gmat1, gdst);
+ };
#endif
}
struct Flip : ArithmTestBase {};
-TEST_P(Flip, X)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::flip(mat1_roi, dst_roi, 0);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::flip(gmat1, gdst, 0);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Flip, X)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::flip(gmat1, gdst, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::flip(gmat1, gdst, 0);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::flip(gmat1, gdst, 0);
+ };
#endif
}
-TEST_P(Flip, Y)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::flip(mat1_roi, dst_roi, 1);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::flip(gmat1, gdst, 1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Flip, Y)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::flip(gmat1, gdst, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::flip(gmat1, gdst, 1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::flip(gmat1, gdst, 1);
+ };
#endif
}
-TEST_P(Flip, BOTH)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::flip(mat1_roi, dst_roi, -1);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::flip(gmat1, gdst, -1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Flip, BOTH)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::flip(mat1_roi, dst_roi, -1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::flip(gmat1, gdst, -1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::flip(gmat1, gdst, -1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::flip(gmat1, gdst, -1);
+ };
#endif
}
struct MinMax : ArithmTestBase {};
-TEST_P(MinMax, MAT)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double minVal, maxVal;
- cv::Point minLoc, maxLoc;
- t0 = (double)cvGetTickCount();//cpu start
- if (mat1.depth() != CV_8S)
- {
- cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
- }
- else
- {
- minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
- for (int i = 0; i < mat1_roi.rows; ++i)
- for (int j = 0; j < mat1_roi.cols; ++j)
- {
- signed char val = mat1_roi.at<signed char>(i, j);
- if (val < minVal) minVal = val;
- if (val > maxVal) maxVal = val;
- }
- }
-
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- double minVal_, maxVal_;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(MinMax, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ t0 = (double)cvGetTickCount();//cpu start
+ if (mat1.depth() != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ if (val < minVal) minVal = val;
+ if (val > maxVal) maxVal = val;
+ }
+ }
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- double minVal_, maxVal_;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_);
+ };
#endif
}
-TEST_P(MinMax, MASK)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double minVal, maxVal;
- cv::Point minLoc, maxLoc;
- t0 = (double)cvGetTickCount();//cpu start
- if (mat1.depth() != CV_8S)
- {
- cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
- }
- else
- {
- minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
- for (int i = 0; i < mat1_roi.rows; ++i)
- for (int j = 0; j < mat1_roi.cols; ++j)
- {
- signed char val = mat1_roi.at<signed char>(i, j);
- unsigned char m = mask_roi.at<unsigned char>(i, j);
- if (val < minVal && m) minVal = val;
- if (val > maxVal && m) maxVal = val;
- }
- }
-
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- gmask = mask_roi;
- double minVal_, maxVal_;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(MinMax, MASK)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ t0 = (double)cvGetTickCount();//cpu start
+ if (mat1.depth() != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ unsigned char m = mask_roi.at<unsigned char>(i, j);
+ if (val < minVal && m) minVal = val;
+ if (val > maxVal && m) maxVal = val;
+ }
+ }
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- gmask = mask_roi;
- double minVal_, maxVal_;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::minMax(gmat1, &minVal_, &maxVal_,gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask);
+ };
#endif
}
struct MinMaxLoc : ArithmTestBase {};
-TEST_P(MinMaxLoc, MAT)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double minVal, maxVal;
- cv::Point minLoc, maxLoc;
- int depth = mat1.depth();
- t0 = (double)cvGetTickCount();//cpu start
- if (depth != CV_8S)
- {
- cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
- }
- else
- {
- minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
- for (int i = 0; i < mat1_roi.rows; ++i)
- for (int j = 0; j < mat1_roi.cols; ++j)
- {
- signed char val = mat1_roi.at<signed char>(i, j);
- if (val < minVal) {
- minVal = val;
- minLoc.x = j;
- minLoc.y = i;
- }
- if (val > maxVal) {
- maxVal = val;
- maxLoc.x = j;
- maxLoc.y = i;
- }
- }
- }
-
-
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- double minVal_, maxVal_;
- cv::Point minLoc_, maxLoc_;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(MinMaxLoc, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ int depth = mat1.depth();
+ t0 = (double)cvGetTickCount();//cpu start
+ if (depth != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ if (val < minVal)
+ {
+ minVal = val;
+ minLoc.x = j;
+ minLoc.y = i;
+ }
+ if (val > maxVal)
+ {
+ maxVal = val;
+ maxLoc.x = j;
+ maxLoc.y = i;
+ }
+ }
+ }
+
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat());
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- double minVal_, maxVal_;
- cv::Point minLoc_, maxLoc_;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, cv::ocl::oclMat());
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat());
+ };
#endif
}
-TEST_P(MinMaxLoc, MASK)
-{
+TEST_P(MinMaxLoc, MASK)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double minVal, maxVal;
- cv::Point minLoc, maxLoc;
- int depth = mat1.depth();
- t0 = (double)cvGetTickCount();//cpu start
- if (depth != CV_8S)
- {
- cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc,mask_roi);
- }
- else
- {
- minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
- for (int i = 0; i < mat1_roi.rows; ++i)
- for (int j = 0; j < mat1_roi.cols; ++j)
- {
- signed char val = mat1_roi.at<signed char>(i, j);
- unsigned char m = mask_roi.at<unsigned char>(i ,j);
- if (val < minVal && m) {
- minVal = val;
- minLoc.x = j;
- minLoc.y = i;
- }
- if (val > maxVal && m) {
- maxVal = val;
- maxLoc.x = j;
- maxLoc.y = i;
- }
- }
- }
-
-
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- gmask = mask_roi;
- double minVal_, maxVal_;
- cv::Point minLoc_, maxLoc_;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double minVal, maxVal;
+ cv::Point minLoc, maxLoc;
+ int depth = mat1.depth();
+ t0 = (double)cvGetTickCount();//cpu start
+ if (depth != CV_8S)
+ {
+ cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi);
+ }
+ else
+ {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ for (int i = 0; i < mat1_roi.rows; ++i)
+ for (int j = 0; j < mat1_roi.cols; ++j)
+ {
+ signed char val = mat1_roi.at<signed char>(i, j);
+ unsigned char m = mask_roi.at<unsigned char>(i , j);
+ if (val < minVal && m)
+ {
+ minVal = val;
+ minLoc.x = j;
+ minLoc.y = i;
+ }
+ if (val > maxVal && m)
+ {
+ maxVal = val;
+ maxLoc.x = j;
+ maxLoc.y = i;
+ }
+ }
+ }
+
+
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- gmask = mask_roi;
- double minVal_, maxVal_;
- cv::Point minLoc_, maxLoc_;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_,&minLoc_, &maxLoc_, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ double minVal_, maxVal_;
+ cv::Point minLoc_, maxLoc_;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask);
+ };
#endif
}
struct Sum : ArithmTestBase {};
-TEST_P(Sum, MAT)
-{
+TEST_P(Sum, MAT)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- Scalar cpures =cv::sum(mat1_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- Scalar gpures=cv::ocl::sum(gmat1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ Scalar cpures = cv::sum(mat1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ Scalar gpures = cv::ocl::sum(gmat1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- Scalar gpures=cv::ocl::sum(gmat1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ Scalar gpures = cv::ocl::sum(gmat1);
+ };
#endif
}
-//TEST_P(Sum, MASK)
-//{
+//TEST_P(Sum, MASK)
+//{
// for(int j=0; j<LOOP_TIMES; j++)
// {
-//
+//
// }
//}
struct CountNonZero : ArithmTestBase {};
-TEST_P(CountNonZero, MAT)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- int cpures =cv::countNonZero(mat1_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- int gpures=cv::ocl::countNonZero(gmat1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(CountNonZero, MAT)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ int cpures = cv::countNonZero(mat1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ int gpures = cv::ocl::countNonZero(gmat1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- int gpures=cv::ocl::countNonZero(gmat1);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ int gpures = cv::ocl::countNonZero(gmat1);
+ };
#endif
}
TEST_P(Phase, Mat)
{
- if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
- {
- cout<<"\tUnsupported type\t\n";
- }
+ if(mat1.depth() != CV_32F && mat1.depth() != CV_64F)
+ {
+ cout << "\tUnsupported type\t\n";
+ }
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::phase(mat1_roi,mat2_roi,dst_roi,0);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::phase(gmat1,gmat2,gdst,0);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::phase(mat1_roi, mat2_roi, dst_roi, 0);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::phase(gmat1, gmat2, gdst, 0);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::phase(gmat1,gmat2,gdst,0);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::phase(gmat1, gmat2, gdst, 0);
+ };
#endif
}
////////////////////////////////bitwise_and/////////////////////////////////////////////////
struct Bitwise_and : ArithmTestBase {};
-TEST_P(Bitwise_and, Mat)
-{
+TEST_P(Bitwise_and, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_and(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_and(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Bitwise_and, Mat_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_and, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_and(gmat1, gmat2, gdst, gmask);
+ };
#endif
}
-TEST_P(Bitwise_and, Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_and(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_and(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_and, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_and(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_and(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_and(gmat1, val, gdst);
+ };
#endif
}
-TEST_P(Bitwise_and, Scalar_Mask)
-{
+TEST_P(Bitwise_and, Scalar_Mask)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_and(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_and(gmat1, val, gdst, gmask);
+ };
#endif
}
struct Bitwise_or : ArithmTestBase {};
-TEST_P(Bitwise_or, Mat)
-{
+TEST_P(Bitwise_or, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_or(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_or(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Bitwise_or, Mat_Mask)
-{
+TEST_P(Bitwise_or, Mat_Mask)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_or(gmat1, gmat2, gdst, gmask);
+ };
#endif
}
-TEST_P(Bitwise_or, Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_or(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_or(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_or, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_or(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_or(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_or(gmat1, val, gdst);
+ };
#endif
}
-TEST_P(Bitwise_or, Scalar_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_or, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_or(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_or(gmat1, val, gdst, gmask);
+ };
#endif
}
struct Bitwise_xor : ArithmTestBase {};
-TEST_P(Bitwise_xor, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_xor, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst);
+ };
#endif
}
-TEST_P(Bitwise_xor, Mat_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_xor, Mat_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, mat2_roi, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_xor(gmat1, gmat2, gdst, gmask);
+ };
#endif
}
-TEST_P(Bitwise_xor, Scalar)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_xor(mat1_roi, val, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_xor(gmat1, val, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_xor, Scalar)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, val, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_xor(gmat1, val, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_xor(gmat1, val, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_xor(gmat1, val, gdst);
+ };
#endif
}
-TEST_P(Bitwise_xor, Scalar_Mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_xor, Scalar_Mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_xor(mat1_roi, val, dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_xor(gmat1, val, gdst, gmask);
+ };
#endif
}
struct Bitwise_not : ArithmTestBase {};
-TEST_P(Bitwise_not, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bitwise_not(mat1_roi,dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bitwise_not(gmat1,gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Bitwise_not, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bitwise_not(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bitwise_not(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bitwise_not(gmat1,gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bitwise_not(gmat1, gdst);
+ };
#endif
}
////////////////////////////////compare/////////////////////////////////////////////////
PARAM_TEST_CASE ( CompareTestBase, MatType, bool)
{
- int type;
- cv::Scalar val;
-
- //src mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::Mat dst1; //bak, for two outputs
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int dstx;
- int dsty;
- int maskx;
- int masky;
-
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- cv::Mat dst1_roi; //bak
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
- cv::ocl::oclMat gdst1_whole; //bak
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gmat2;
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gdst1; //bak
- cv::ocl::oclMat gmask;
-
- virtual void SetUp()
- {
- //type = GET_PARAM(0);
- type = CV_8UC1;
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ //type = GET_PARAM(0);
+ type = CV_8UC1;
- cv::RNG& rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
+ cv::Size size(MWIDTH, MHEIGHT);
- mat1 = randomMat(rng, size, type, 5, 16, false);
- //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
- mat2 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- dst1 = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums>0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src2x = 1;
- src1y = 1;
- src2y = 1;
- dstx = 1;
- dsty =1;
- maskx =1;
- masky =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src2x = 0;
- src1y = 0;
- src2y = 0;
- dstx = 0;
- dsty = 0;
- maskx =0;
- masky =0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- //mat2_roi = mat2(Rect(src2x,src2y,256,1));
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
-
- //gdst_whole = dst;
- //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gdst1_whole = dst1;
- //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gmat1 = mat1_roi;
- //gmat2 = mat2_roi;
- //gmask = mask_roi;
- }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty = 1;
+ maskx = 1;
+ masky = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ //mat2_roi = mat2(Rect(src2x,src2y,256,1));
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gdst1_whole = dst1;
+ //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat1 = mat1_roi;
+ //gmat2 = mat2_roi;
+ //gmask = mask_roi;
+ }
};
struct Compare : CompareTestBase {};
-TEST_P(Compare, Mat)
-{
- if(mat1.type()==CV_8SC1)
- {
- cout << "\tUnsupported type\t\n";
- }
+TEST_P(Compare, Mat)
+{
+ if(mat1.type() == CV_8SC1)
+ {
+ cout << "\tUnsupported type\t\n";
+ }
- int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
- const char* cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
- int cmp_num = sizeof(cmp_codes) / sizeof(int);
- for (int i = 0; i < cmp_num; ++i)
- {
+ int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE};
+ const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"};
+ int cmp_num = sizeof(cmp_codes) / sizeof(int);
+ for (int i = 0; i < cmp_num; ++i)
+ {
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::compare(mat1_roi,mat2_roi,dst_roi,cmp_codes[i]);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- cout<<cmp_str[i] <<endl;
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::compare(mat1_roi, mat2_roi, dst_roi, cmp_codes[i]);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ cout << cmp_str[i] << endl;
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::compare(gmat1,gmat2,gdst,cmp_codes[i]);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]);
+ };
#endif
- }
+ }
}
TEST_P(Pow, Mat)
{
- if(mat1.depth()!=CV_32F && mat1.depth()!=CV_64F)
- {
- cout<<"\tUnsupported type\t\n";
- }
+ if(mat1.depth() != CV_32F && mat1.depth() != CV_64F)
+ {
+ cout << "\tUnsupported type\t\n";
+ }
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double p=4.5;
- t0 = (double)cvGetTickCount();//cpu start
- cv::pow(mat1_roi,p,dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::pow(gmat1,p,gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double p = 4.5;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::pow(mat1_roi, p, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::pow(gmat1, p, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- double p=4.5;
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::pow(gmat1,p,gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ double p = 4.5;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::pow(gmat1, p, gdst);
+ };
#endif
}
struct MagnitudeSqr : ArithmTestBase {};
-TEST_P(MagnitudeSqr, Mat)
-{
+TEST_P(MagnitudeSqr, Mat)
+{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- for(int i = 0;i < mat1.rows;++i)
- for(int j = 0;j < mat1.cols;++j)
- {
- float val1 = mat1.at<float>(i,j);
- float val2 = mat2.at<float>(i,j);
-
- ((float *)(dst.data))[i*dst.step/4 +j]= val1 * val1 +val2 * val2;
-
- }
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- cldst.download(cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ for(int i = 0; i < mat1.rows; ++i)
+ for(int j = 0; j < mat1.cols; ++j)
+ {
+ float val1 = mat1.at<float>(i, j);
+ float val2 = mat2.at<float>(i, j);
+
+ ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
+
+ }
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::magnitudeSqr(clmat1, clmat2, cldst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ cldst.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::magnitudeSqr(clmat1,clmat2, cldst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::magnitudeSqr(clmat1, clmat2, cldst);
+ };
#endif
}
struct AddWeighted : ArithmTestBase {};
-TEST_P(AddWeighted, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- double alpha=2.0,beta=1.0,gama=3.0;
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::addWeighted(mat1_roi,alpha,mat2_roi,beta,gama,dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
+TEST_P(AddWeighted, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ double alpha = 2.0, beta = 1.0, gama = 3.0;
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::addWeighted(mat1_roi, alpha, mat2_roi, beta, gama, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
gmat2 = mat2_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::addWeighted(gmat1,alpha,gmat2,beta,gama, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download(cpu_dst);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download(cpu_dst);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
-
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
-}
#else
for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- double alpha=2.0,beta=1.0,gama=3.0;
+ {
+ Has_roi(j);
+ double alpha = 2.0, beta = 1.0, gama = 3.0;
gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
gmat1 = mat1_roi;
gmat2 = mat2_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::addWeighted(gmat1,alpha, gmat2,beta,gama, gdst);
- // double alpha=2.0,beta=1.0,gama=3.0;
- // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
- // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
- };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst);
+ // double alpha=2.0,beta=1.0,gama=3.0;
+ // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
+ // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
+ // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
+ };
#endif
}
/*
struct AddWeighted : ArithmTestBase {};
-TEST_P(AddWeighted, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
+TEST_P(AddWeighted, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
double totalcputick=0;
double totalgputick=0;
double totalgputick_kernel=0;
double t0=0;
double t1=0;
- double t2=0;
+ double t2=0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
- double alpha=2.0,beta=1.0,gama=3.0;
+ double alpha=2.0,beta=1.0,gama=3.0;
t0 = (double)cvGetTickCount();//cpu start
cv::addWeighted(mat1,alpha,mat2,beta,gama,dst);
t0 = (double)cvGetTickCount() - t0;//cpu end
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
t2=(double)cvGetTickCount();//kernel
if(j == 0)
continue;
totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalcputick=t0+totalcputick;
+ totalgputick_kernel=t2+totalgputick_kernel;
}
cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
#else
//for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
// {
- double alpha=2.0,beta=1.0,gama=3.0;
+ double alpha=2.0,beta=1.0,gama=3.0;
cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst;
//if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst);
//********test****************
INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(
- Values(CV_8UC1, CV_8UC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine(
- Values(CV_32FC1, CV_64FC1),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_32FC1, CV_64FC1),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine(
- Values(CV_32FC1, CV_64FC1),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_32FC1, CV_64FC1),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
- Values(CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
- Values(CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
- Values(CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
- Values(CV_8UC1, CV_32FC1),
- Values(false)));
+ Values(CV_8UC1, CV_32FC1),
+ Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine(
- Values(CV_8UC1, CV_32FC1),
- Values(false)));
+ Values(CV_8UC1, CV_32FC1),
+ Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine(
- Values(CV_8U, CV_32S, CV_32F),
- Values(false)));
+ Values(CV_8U, CV_32S, CV_32F),
+ Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine(
- Values(CV_8U, CV_32S, CV_32F),
- Values(false)));
+ Values(CV_8U, CV_32S, CV_32F),
+ Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
-INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1,CV_16UC1,CV_16SC1,CV_32SC1,CV_32FC1,CV_64FC1), Values(false)));
+INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
+INSTANTIATE_TEST_CASE_P(Arithm, MagnitudeSqr, Combine(
+ Values(CV_32FC1, CV_32FC1),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_32SC1, CV_32FC1),
+ Values(false))); // Values(false) is the reserved parameter
PARAM_TEST_CASE(Blend, MatType, int)
{
- int type;
- int channels;
- std::vector<cv::ocl::Info> oclinfo;
-
- virtual void SetUp()
- {
-
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- //cv::ocl::setBinpath(CLBINPATH);
- }
+ int type;
+ int channels;
+ std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
};
TEST_P(Blend, Performance)
{
- cv::Size size(MWIDTH, MHEIGHT);
- cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
- cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
- cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
- cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
- cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
- cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
-
-
- double totalgputick_all = 0;
- double totalgputick_kernel = 0;
- double t1 = 0;
- double t2 = 0;
-
- for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
- {
- t1 = (double)cvGetTickCount();
- cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
- cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
- cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
- cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
-
- t2 = (double)cvGetTickCount();
- cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
- t2 = (double)cvGetTickCount() - t2;
-
- cv::Mat m;
- gdst.download(m);
- t1 = (double)cvGetTickCount() - t1;
-
- if (j == 0)
- {
- continue;
- }
-
- totalgputick_all = t1 + totalgputick_all;
- totalgputick_kernel = t2 + totalgputick_kernel;
- };
-
- cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
-
- cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
-
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
+ cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0);
+ cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
+ cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
+ cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F);
+ cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels));
+
+
+ double totalgputick_all = 0;
+ double totalgputick_kernel = 0;
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100
+ {
+ t1 = (double)cvGetTickCount();
+ cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host);
+ cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host);
+ cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1);
+ cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1);
+
+ t2 = (double)cvGetTickCount();
+ cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst);
+ t2 = (double)cvGetTickCount() - t2;
+
+ cv::Mat m;
+ gdst.download(m);
+ t1 = (double)cvGetTickCount() - t1;
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick_all = t1 + totalgputick_all;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+ };
+
+ cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+ cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient)
{
- int apperture_size;
- bool useL2gradient;
- //std::vector<cv::ocl::Info> oclinfo;
-
- virtual void SetUp()
- {
- apperture_size = GET_PARAM(0);
- useL2gradient = GET_PARAM(1);
-
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- }
+ int apperture_size;
+ bool useL2gradient;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ apperture_size = GET_PARAM(0);
+ useL2gradient = GET_PARAM(1);
+
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
};
TEST_P(Canny1, Performance)
{
- cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
- ASSERT_FALSE(img.empty());
-
- double low_thresh = 100.0;
- double high_thresh = 150.0;
-
- cv::Mat edges_gold;
- cv::ocl::oclMat edges;
-
- double totalgputick=0;
- double totalgputick_kernel=0;
-
- double t1=0;
- double t2=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
-
- t1 = (double)cvGetTickCount();//gpu start1
-
- cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
-
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_dst;
- edges.download (cpu_dst);//download
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
-
- totalgputick_kernel=t2+totalgputick_kernel;
+ cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);
+ ASSERT_FALSE(img.empty());
- }
+ double low_thresh = 100.0;
+ double high_thresh = 150.0;
+
+ cv::Mat edges_gold;
+ cv::ocl::oclMat edges;
+
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+
+ double t1 = 0;
+ double t2 = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ edges.download (cpu_dst);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine(
- testing::Values(AppertureSize(3), AppertureSize(5)),
- testing::Values(L2gradient(false), L2gradient(true))));
+ testing::Values(AppertureSize(3), AppertureSize(5)),
+ testing::Values(L2gradient(false), L2gradient(true))));
//
// @Authors
// Fangfang Bai fangfang@multicorewareinc.com
-//
+//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
PARAM_TEST_CASE(ColumnSum)
{
- cv::Mat src;
- //std::vector<cv::ocl::Info> oclinfo;
-
- virtual void SetUp()
- {
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- }
+ cv::Mat src;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
};
TEST_F(ColumnSum, Performance)
{
- cv::Size size(MWIDTH,MHEIGHT);
+ cv::Size size(MWIDTH, MHEIGHT);
cv::Mat src = randomMat(size, CV_32FC1);
cv::ocl::oclMat d_dst;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t1=0;
- double t2=0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t1 = 0;
+ double t2 = 0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
- cv::ocl::oclMat d_src(src);
+ cv::ocl::oclMat d_src(src);
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::columnSum(d_src,d_dst);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::columnSum(d_src, d_dst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- d_dst.download (cpu_dst);//download
+ cv::Mat cpu_dst;
+ d_dst.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
+ }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
-#endif
\ No newline at end of file
+#endif
\ No newline at end of file
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
-PARAM_TEST_CASE(Dft, cv::Size, bool)
+PARAM_TEST_CASE(Dft, cv::Size, bool)
{
- cv::Size dft_size;
- bool dft_rows;
- vector<cv::ocl::Info> info;
- virtual void SetUp()
- {
- dft_size = GET_PARAM(0);
- dft_rows = GET_PARAM(1);
- cv::ocl::getDevice(info);
- }
+ cv::Size dft_size;
+ bool dft_rows;
+ vector<cv::ocl::Info> info;
+ virtual void SetUp()
+ {
+ dft_size = GET_PARAM(0);
+ dft_rows = GET_PARAM(1);
+ cv::ocl::getDevice(info);
+ }
};
TEST_P(Dft, C2C)
{
- cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
- int flags = 0;
- flags |= dft_rows ? cv::DFT_ROWS : 0;
+ cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
+ int flags = 0;
+ flags |= dft_rows ? cv::DFT_ROWS : 0;
- cv::ocl::oclMat d_b;
+ cv::ocl::oclMat d_b;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t1=0;
- double t2=0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t1 = 0;
+ double t2 = 0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
- cv::ocl::oclMat ga=cv::ocl::oclMat(a);//upload
+ cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::dft(ga, d_b, a.size(), flags);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::dft(ga, d_b, a.size(), flags);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- d_b.download (cpu_dst);//download
+ cv::Mat cpu_dst;
+ d_b.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
+ }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
TEST_P(Dft, R2CthenC2R)
{
- cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
+ cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
- int flags = 0;
- //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
+ int flags = 0;
+ //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
- cv::ocl::oclMat d_b, d_c;
+ cv::ocl::oclMat d_b, d_c;
- cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
- cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
+ cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+ cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
- EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
+ EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
PARAM_TEST_CASE(FilterTestBase, MatType, bool)
{
- int type;
- cv::Scalar val;
-
- //src mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::Mat dst1; //bak, for two outputs
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int dstx;
- int dsty;
- int maskx;
- int masky;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- cv::Mat dst1_roi; //bak
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
- cv::ocl::oclMat gdst1_whole; //bak
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gmat2;
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gdst1; //bak
- cv::ocl::oclMat gmask;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- mat2 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- dst1 = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
-
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- }
-
- void random_roi()
- {
- cv::RNG& rng = TS::ptr()->get_rng();
-
- //randomize ROI
- roicols = rng.uniform(1, mat1.cols);
- roirows = rng.uniform(1, mat1.rows);
- src1x = rng.uniform(0, mat1.cols - roicols);
- src1y = rng.uniform(0, mat1.rows - roirows);
- src2x = rng.uniform(0, mat2.cols - roicols);
- src2y = rng.uniform(0, mat2.rows - roirows);
- dstx = rng.uniform(0, dst.cols - roicols);
- dsty = rng.uniform(0, dst.rows - roirows);
- maskx = rng.uniform(0, mask.cols - roicols);
- masky = rng.uniform(0, mask.rows - roirows);
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- dst1_roi = dst1(Rect(dstx,dsty,roicols,roirows));
-
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmask = mask_roi;
- }
+ int type;
+ cv::Scalar val;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+ cv::ocl::oclMat gdst1_whole; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdst1; //bak
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ mat2 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dst1 = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ }
+
+ void random_roi()
+ {
+ cv::RNG &rng = TS::ptr()->get_rng();
+
+ //randomize ROI
+ roicols = rng.uniform(1, mat1.cols);
+ roirows = rng.uniform(1, mat1.rows);
+ src1x = rng.uniform(0, mat1.cols - roicols);
+ src1y = rng.uniform(0, mat1.rows - roirows);
+ src2x = rng.uniform(0, mat2.cols - roicols);
+ src2y = rng.uniform(0, mat2.rows - roirows);
+ dstx = rng.uniform(0, dst.cols - roicols);
+ dsty = rng.uniform(0, dst.rows - roirows);
+ maskx = rng.uniform(0, mask.cols - roicols);
+ masky = rng.uniform(0, mask.rows - roirows);
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows));
+
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmask = mask_roi;
+ }
};
PARAM_TEST_CASE(Blur, MatType, cv::Size, int)
{
- int type;
- cv::Size ksize;
- int bordertype;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- ksize = GET_PARAM(1);
- bordertype = GET_PARAM(2);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ cv::Size ksize;
+ int bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+ bordertype = GET_PARAM(2);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
TEST_P(Blur, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::blur(mat1_roi, dst_roi, ksize, Point(-1,-1), bordertype);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::blur(mat1_roi, dst_roi, ksize, Point(-1, -1), bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::blur(gmat1, gdst, ksize, Point(-1,-1), bordertype);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype);
+ };
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////////////
-//Laplacian
+//Laplacian
PARAM_TEST_CASE(LaplacianTestBase, MatType, int)
{
- int type;
- int ksize;
-
- //src mat
- cv::Mat mat;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat_roi;
- cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- ksize = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- mat = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat.cols-1;
- roirows = mat.rows-1;
- srcx = 1;
- srcy = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat.cols;
- roirows = mat.rows;
- srcx = 0;
- srcy = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ int ksize;
+
+ //src mat
+ cv::Mat mat;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat.cols - 1;
+ roirows = mat.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
struct Laplacian : LaplacianTestBase {};
-TEST_P(Laplacian, Accuracy)
-{
-
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat = mat_roi;
-
+TEST_P(Laplacian, Accuracy)
+{
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
- };
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat = mat_roi;
+
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1);
+ };
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////////////
-// erode & dilate
+// erode & dilate
PARAM_TEST_CASE(ErodeDilateBase, MatType, bool)
{
- int type;
- //int iterations;
-
- //erode or dilate kernel
- cv::Mat kernel;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- // iterations = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
- kernel = randomMat(rng, Size(3,3), CV_8UC1, 0, 3, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ //int iterations;
+
+ //erode or dilate kernel
+ cv::Mat kernel;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ // iterations = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+ kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
-// erode
+// erode
-struct Erode : ErodeDilateBase{};
+struct Erode : ErodeDilateBase {};
TEST_P(Erode, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::erode(mat1_roi, dst_roi, kernel);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
-
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::erode(gmat1, gdst, kernel);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::erode(mat1_roi, dst_roi, kernel);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::erode(gmat1, gdst, kernel);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::erode(gmat1, gdst, kernel);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::erode(gmat1, gdst, kernel);
+ };
#endif
}
// dilate
-struct Dilate : ErodeDilateBase{};
+struct Dilate : ErodeDilateBase {};
TEST_P(Dilate, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- t0 = (double)cvGetTickCount();//cpu start
- cv::dilate(mat1_roi, dst_roi, kernel);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::dilate(gmat1, gdst, kernel);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::dilate(mat1_roi, dst_roi, kernel);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::dilate(gmat1, gdst, kernel);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::dilate(gmat1, gdst, kernel);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::dilate(gmat1, gdst, kernel);
+ };
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////////////
-// Sobel
+// Sobel
PARAM_TEST_CASE(Sobel, MatType, int, int, int, int)
{
- int type;
- int dx, dy, ksize, bordertype;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- dx = GET_PARAM(1);
- dy = GET_PARAM(2);
- ksize = GET_PARAM(3);
- bordertype = GET_PARAM(4);
- dx = 2; dy=0;
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ int dx, dy, ksize, bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dx = GET_PARAM(1);
+ dy = GET_PARAM(2);
+ ksize = GET_PARAM(3);
+ bordertype = GET_PARAM(4);
+ dx = 2;
+ dy = 0;
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
TEST_P(Sobel, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::Sobel(gmat1, gdst,-1, dx,dy,ksize,/*scale*/0.00001,/*delta*/0, bordertype);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype);
+ };
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////////////
-// Scharr
+// Scharr
PARAM_TEST_CASE(Scharr, MatType, int, int, int)
{
- int type;
- int dx, dy, bordertype;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- dx = GET_PARAM(1);
- dy = GET_PARAM(2);
- bordertype = GET_PARAM(3);
- dx = 1; dy=0;
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ int dx, dy, bordertype;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dx = GET_PARAM(1);
+ dy = GET_PARAM(2);
+ bordertype = GET_PARAM(3);
+ dx = 1;
+ dy = 0;
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
TEST_P(Scharr, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::Scharr(gmat1, gdst,-1, dx,dy,/*scale*/1,/*delta*/0, bordertype);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype);
+ };
#endif
}
PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int)
{
- int type;
- cv::Size ksize;
- int bordertype;
-
- double sigma1, sigma2;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- ksize = GET_PARAM(1);
- bordertype = GET_PARAM(2);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- sigma1 = rng.uniform(0.1, 1.0);
- sigma2 = rng.uniform(0.1, 1.0);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- roicols = mat1.cols-1;
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- }
+ int type;
+ cv::Size ksize;
+ int bordertype;
+
+ double sigma1, sigma2;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ ksize = GET_PARAM(1);
+ bordertype = GET_PARAM(2);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
+
+ sigma1 = rng.uniform(0.1, 1.0);
+ sigma2 = rng.uniform(0.1, 1.0);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ roicols = mat1.cols - 1;
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ }
};
TEST_P(GaussianBlur, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype);
+ };
#endif
}
//************test**********
INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
+ Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(1/*, 3*/)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(1/*, 3*/)));
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1),
- Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
- (MatType)cv::BORDER_REPLICATE)));
+ Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
+ (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
- Values(CV_8UC1, CV_32FC1), Values(0, 1), Values(0, 1),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
+ Values(CV_8UC1, CV_32FC1), Values(0, 1), Values(0, 1),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
- Values(CV_8UC1, CV_32FC1),
- Values(cv::Size(3, 3), cv::Size(5, 5)),
- Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
+ Values(CV_8UC1, CV_32FC1),
+ Values(cv::Size(3, 3), cv::Size(5, 5)),
+ Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
#endif // HAVE_OPENCL
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
-PARAM_TEST_CASE(Gemm, int, cv::Size, int)
+PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
- int type;
- cv::Size mat_size;
- int flags;
- vector<cv::ocl::Info> info;
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- mat_size = GET_PARAM(1);
- flags = GET_PARAM(2);
-
- cv::ocl::getDevice(info);
- }
+ int type;
+ cv::Size mat_size;
+ int flags;
+ vector<cv::ocl::Info> info;
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ mat_size = GET_PARAM(1);
+ flags = GET_PARAM(2);
+
+ cv::ocl::getDevice(info);
+ }
};
TEST_P(Gemm, Performance)
{
- cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
- cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
- cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
- cv::ocl::oclMat ocl_dst;
+ cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
+ cv::ocl::oclMat ocl_dst;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t1=0;
- double t2=0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t1 = 0;
+ double t2 = 0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
- cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
- cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
- cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
+ cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
+ cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
+ cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- ocl_dst.download (cpu_dst);//download
+ cv::Mat cpu_dst;
+ ocl_dst.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end
+ t1 = (double)cvGetTickCount() - t1;//gpu end
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ }
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
- testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
- testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
- testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+ testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
+ testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
+ testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif
\ No newline at end of file
using namespace std;
using namespace cv;
-struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
+struct getRect
+{
+ Rect operator ()(const CvAvgComp &e) const
+ {
+ return e.rect;
+ }
+};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
- //std::vector<cv::ocl::Info> oclinfo;
- cv::ocl::OclCascadeClassifier cascade, nestedCascade;
- cv::CascadeClassifier cpucascade, cpunestedCascade;
- // Mat img;
-
- double scale;
- int index;
-
- virtual void SetUp()
- {
- scale = 1.0;
- index=0;
- string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml";
-
- if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
- {
- cout << "ERROR: Could not load classifier cascade" << endl;
- cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
- " [--scale[=<image scale>\n"
- " [filename|camera_index]\n" << endl ;
- return;
- }
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums>0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath("E:\\");
- }
+ //std::vector<cv::ocl::Info> oclinfo;
+ cv::ocl::OclCascadeClassifier cascade, nestedCascade;
+ cv::CascadeClassifier cpucascade, cpunestedCascade;
+ // Mat img;
+
+ double scale;
+ int index;
+
+ virtual void SetUp()
+ {
+ scale = 1.0;
+ index = 0;
+ string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
+
+ if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
+ {
+ cout << "ERROR: Could not load classifier cascade" << endl;
+ cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
+ " [--scale[=<image scale>\n"
+ " [filename|camera_index]\n" << endl ;
+ return;
+ }
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath("E:\\");
+ }
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {};
-TEST_F(Haar, FaceDetect)
-{
- string imgName = "../../../samples/c/lena.jpg";
- Mat img = imread( imgName, 1 );
-
- if(img.empty())
- {
- std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
- return ;
- }
-
- int i = 0;
- double t = 0;
- vector<Rect> faces, oclfaces;
-
- const static Scalar colors[] = { CV_RGB(0,0,255),
- CV_RGB(0,128,255),
- CV_RGB(0,255,255),
- CV_RGB(0,255,0),
- CV_RGB(255,128,0),
- CV_RGB(255,255,0),
- CV_RGB(255,0,0),
- CV_RGB(255,0,255)} ;
-
- Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
- MemStorage storage(cvCreateMemStorage(0));
- cvtColor( img, gray, CV_BGR2GRAY );
- resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
- equalizeHist( smallImg, smallImg );
-
- t = (double)cvGetTickCount();
- for(int k= 0; k<LOOP_TIMES; k++)
- {
- cpucascade.detectMultiScale( smallImg, faces, 1.1,
- 3, 0
- |CV_HAAR_SCALE_IMAGE
- , Size(30,30), Size(0, 0) );
- }
- t = (double)cvGetTickCount() - t ;
- printf( "cpudetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
-
- cv::ocl::oclMat image;
- CvSeq* _objects;
- t = (double)cvGetTickCount();
- for(int k= 0; k<LOOP_TIMES; k++)
- {
- image.upload(smallImg);
- _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
- 3, 0
- |CV_HAAR_SCALE_IMAGE
- , Size(30,30), Size(0, 0) );
- }
- t = (double)cvGetTickCount() - t ;
- printf( "ocldetection time = %g ms\n", t/(LOOP_TIMES*(double)cvGetTickFrequency()*1000.) );
- vector<CvAvgComp> vecAvgComp;
- Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
- oclfaces.resize(vecAvgComp.size());
- std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-
- //for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
- //{
- // Mat smallImgROI;
- // Point center;
- // Scalar color = colors[i%8];
- // int radius;
- // center.x = cvRound((r->x + r->width*0.5)*scale);
- // center.y = cvRound((r->y + r->height*0.5)*scale);
- // radius = cvRound((r->width + r->height)*0.25*scale);
- // circle( img, center, radius, color, 3, 8, 0 );
- //}
- //namedWindow("result");
- //imshow("result",img);
- //waitKey(0);
- //destroyAllWindows();
+TEST_F(Haar, FaceDetect)
+{
+ string imgName = "../../../samples/c/lena.jpg";
+ Mat img = imread( imgName, 1 );
+
+ if(img.empty())
+ {
+ std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
+ return ;
+ }
+
+ int i = 0;
+ double t = 0;
+ vector<Rect> faces, oclfaces;
+
+ const static Scalar colors[] = { CV_RGB(0, 0, 255),
+ CV_RGB(0, 128, 255),
+ CV_RGB(0, 255, 255),
+ CV_RGB(0, 255, 0),
+ CV_RGB(255, 128, 0),
+ CV_RGB(255, 255, 0),
+ CV_RGB(255, 0, 0),
+ CV_RGB(255, 0, 255)
+ } ;
+
+ Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
+ MemStorage storage(cvCreateMemStorage(0));
+ cvtColor( img, gray, CV_BGR2GRAY );
+ resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+ equalizeHist( smallImg, smallImg );
+
+ t = (double)cvGetTickCount();
+ for(int k = 0; k < LOOP_TIMES; k++)
+ {
+ cpucascade.detectMultiScale( smallImg, faces, 1.1,
+ 3, 0
+ | CV_HAAR_SCALE_IMAGE
+ , Size(30, 30), Size(0, 0) );
+ }
+ t = (double)cvGetTickCount() - t ;
+ printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
+
+ cv::ocl::oclMat image;
+ CvSeq *_objects;
+ t = (double)cvGetTickCount();
+ for(int k = 0; k < LOOP_TIMES; k++)
+ {
+ image.upload(smallImg);
+ _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
+ 3, 0
+ | CV_HAAR_SCALE_IMAGE
+ , Size(30, 30), Size(0, 0) );
+ }
+ t = (double)cvGetTickCount() - t ;
+ printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) );
+ vector<CvAvgComp> vecAvgComp;
+ Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+ oclfaces.resize(vecAvgComp.size());
+ std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
+
+ //for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
+ //{
+ // Mat smallImgROI;
+ // Point center;
+ // Scalar color = colors[i%8];
+ // int radius;
+ // center.x = cvRound((r->x + r->width*0.5)*scale);
+ // center.y = cvRound((r->y + r->height*0.5)*scale);
+ // radius = cvRound((r->width + r->height)*0.25*scale);
+ // circle( img, center, radius, color, 3, 8, 0 );
+ //}
+ //namedWindow("result");
+ //imshow("result",img);
+ //waitKey(0);
+ //destroyAllWindows();
}
#endif // HAVE_OPENCL
#include "precomp.hpp"\r
#include <iomanip>\r
\r
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
+#ifdef HAVE_OPENCL\r
+\r
+using namespace cv;\r
+using namespace cv::ocl;\r
+using namespace cvtest;\r
+using namespace testing;\r
using namespace std;\r
\r
#define FILTER_IMAGE "../../../samples/gpu/road.png"\r
-
+\r
#ifndef MWC_TEST_UTILITY\r
#define MWC_TEST_UTILITY\r
\r
}\r
\r
#endif // IMPLEMENT_PARAM_CLASS\r
-#endif // MWC_TEST_UTILITY
-
-IMPLEMENT_PARAM_CLASS(WinSizw48, bool);
-
-PARAM_TEST_CASE(HOG, WinSizw48, bool)
-{
- bool is48;
- vector<float> detector;
- virtual void SetUp()
- {
- is48 = GET_PARAM(0);
- if(is48)
- {
- detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();
- }
- else
- {
- detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();
- }
- }
-};
-
-TEST_P(HOG, Performance)
-{
- cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
- ASSERT_FALSE(img.empty());
-
- // define HOG related arguments
+#endif // MWC_TEST_UTILITY\r
+\r
+IMPLEMENT_PARAM_CLASS(WinSizw48, bool);\r
+\r
+PARAM_TEST_CASE(HOG, WinSizw48, bool)\r
+{\r
+ bool is48;\r
+ vector<float> detector;\r
+ virtual void SetUp()\r
+ {\r
+ is48 = GET_PARAM(0);\r
+ if(is48)\r
+ {\r
+ detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96();\r
+ }\r
+ else\r
+ {\r
+ detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128();\r
+ }\r
+ }\r
+};\r
+\r
+TEST_P(HOG, Performance)\r
+{\r
+ cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);\r
+ ASSERT_FALSE(img.empty());\r
+\r
+ // define HOG related arguments\r
float scale = 1.05;\r
int nlevels = 13;\r
float gr_threshold = 8;\r
float hit_threshold = 1.4;\r
bool hit_threshold_auto = true;\r
\r
- int win_width = is48? 48 : 64;\r
+ int win_width = is48 ? 48 : 64;\r
int win_stride_width = 8;\r
int win_stride_height = 8;\r
\r
- bool gamma_corr = true;
-
+ bool gamma_corr = true;\r
+\r
Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96)\r
- Size win_stride(win_stride_width, win_stride_height);
-
+ Size win_stride(win_stride_width, win_stride_height);\r
+\r
cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,\r
- cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,\r
- cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);\r
+ cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,\r
+ cv::ocl::HOGDescriptor::DEFAULT_NLEVELS);\r
\r
gpu_hog.setSVMDetector(detector);\r
-
- double totalgputick=0;
- double totalgputick_kernel=0;
-
- double t1=0;
- double t2=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- t1 = (double)cvGetTickCount();//gpu start1
-
- ocl::oclMat d_src(img);//upload
-
- t2=(double)cvGetTickCount();//kernel
-
- vector<Rect> found;
+\r
+ double totalgputick = 0;\r
+ double totalgputick_kernel = 0;\r
+\r
+ double t1 = 0;\r
+ double t2 = 0;\r
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)\r
+ {\r
+ t1 = (double)cvGetTickCount();//gpu start1\r
+\r
+ ocl::oclMat d_src(img);//upload\r
+\r
+ t2 = (double)cvGetTickCount(); //kernel\r
+\r
+ vector<Rect> found;\r
gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride,\r
- Size(0, 0), scale, gr_threshold);
-
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- // no download time for HOG
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
-
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
-
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
-}
-
-
-INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));
-
+ Size(0, 0), scale, gr_threshold);\r
+\r
+ t2 = (double)cvGetTickCount() - t2;//kernel\r
+\r
+ // no download time for HOG\r
+\r
+ t1 = (double)cvGetTickCount() - t1;//gpu end1\r
+\r
+ if(j == 0)\r
+ continue;\r
+\r
+ totalgputick = t1 + totalgputick;\r
+\r
+ totalgputick_kernel = t2 + totalgputick_kernel;\r
+\r
+ }\r
+\r
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;\r
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;\r
+}\r
+\r
+\r
+INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false)));\r
+\r
#endif //Have opencl
\ No newline at end of file
vector<MatType> typeVector(MatType type)
{
- vector<MatType> v;
- v.push_back(type);
- return v;
+ vector<MatType> v;
+ v.push_back(type);
+ return v;
}
-PARAM_TEST_CASE(ImgprocTestBase, MatType,MatType,MatType,MatType,MatType, bool)
+PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool)
{
- int type1,type2,type3,type4,type5;
- cv::Scalar val;
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int dstx;
- int dsty;
- int dst1x;
- int dst1y;
- int maskx;
- int masky;
-
- //mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mask;
- cv::Mat dst;
- cv::Mat dst1; //bak, for two outputs
-
- //mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- cv::Mat dst1_roi; //bak
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl mat
- cv::ocl::oclMat clmat1;
- cv::ocl::oclMat clmat2;
- cv::ocl::oclMat clmask;
- cv::ocl::oclMat cldst;
- cv::ocl::oclMat cldst1; //bak
-
- //ocl mat with roi
- cv::ocl::oclMat clmat1_roi;
- cv::ocl::oclMat clmat2_roi;
- cv::ocl::oclMat clmask_roi;
- cv::ocl::oclMat cldst_roi;
- cv::ocl::oclMat cldst1_roi;
-
- virtual void SetUp()
- {
- type1 = GET_PARAM(0);
- type2 = GET_PARAM(1);
- type3 = GET_PARAM(2);
- type4 = GET_PARAM(3);
- type5 = GET_PARAM(4);
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
- double min = 1,max = 20;
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums>0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- if(type1!=nulltype)
- {
- mat1 = randomMat(rng, size, type1, min, max, false);
- clmat1 = mat1;
- }
- if(type2!=nulltype)
- {
- mat2 = randomMat(rng, size, type2, min, max, false);
- clmat2 = mat2;
- }
- if(type3!=nulltype)
- {
- dst = randomMat(rng, size, type3, min, max, false);
- cldst = dst;
- }
- if(type4!=nulltype)
- {
- dst1 = randomMat(rng, size, type4, min, max, false);
- cldst1 = dst1;
- }
- if(type5!=nulltype)
- {
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
- cv::threshold(mask, mask, 0.5, 255., type5);
- clmask = mask;
- }
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- }
-
-
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat1.cols-1; //start
- roirows = mat1.rows-1;
- src1x = 1;
- src2x = 1;
- src1y = 1;
- src2y = 1;
- dstx = 1;
- dsty =1;
- dst1x = 1;
- dst1y =1;
- maskx =1;
- masky =1;
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src2x = 0;
- src1y = 0;
- src2y = 0;
- dstx = 0;
- dsty = 0;
- dst1x =0;
- dst1y =0;
- maskx =0;
- masky =0;
- };
-
- if(type1!=nulltype)
- {
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- if(type2!=nulltype)
- {
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
- }
- if(type3!=nulltype)
- {
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
- }
- if(type4!=nulltype)
- {
- dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
- //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
- }
- if(type5!=nulltype)
- {
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
- }
- }
-
- void random_roi()
- {
- cv::RNG& rng = TS::ptr()->get_rng();
-
- //randomize ROI
- roicols = rng.uniform(1, mat1.cols);
- roirows = rng.uniform(1, mat1.rows);
- src1x = rng.uniform(0, mat1.cols - roicols);
- src1y = rng.uniform(0, mat1.rows - roirows);
- src2x = rng.uniform(0, mat2.cols - roicols);
- src2y = rng.uniform(0, mat2.rows - roirows);
- dstx = rng.uniform(0, dst.cols - roicols);
- dsty = rng.uniform(0, dst.rows - roirows);
- dst1x = rng.uniform(0, dst1.cols - roicols);
- dst1y = rng.uniform(0, dst1.rows - roirows);
- maskx = rng.uniform(0, mask.cols - roicols);
- masky = rng.uniform(0, mask.rows - roirows);
-
- if(type1!=nulltype)
- {
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- if(type2!=nulltype)
- {
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
- }
- if(type3!=nulltype)
- {
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
- }
- if(type4!=nulltype)
- {
- dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
- //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
- }
- if(type5!=nulltype)
- {
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
- }
- }
+ int type1, type2, type3, type4, type5;
+ cv::Scalar val;
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int dstx;
+ int dsty;
+ int dst1x;
+ int dst1y;
+ int maskx;
+ int masky;
+
+ //mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mask;
+ cv::Mat dst;
+ cv::Mat dst1; //bak, for two outputs
+
+ //mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ cv::Mat dst1_roi; //bak
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl mat
+ cv::ocl::oclMat clmat1;
+ cv::ocl::oclMat clmat2;
+ cv::ocl::oclMat clmask;
+ cv::ocl::oclMat cldst;
+ cv::ocl::oclMat cldst1; //bak
+
+ //ocl mat with roi
+ cv::ocl::oclMat clmat1_roi;
+ cv::ocl::oclMat clmat2_roi;
+ cv::ocl::oclMat clmask_roi;
+ cv::ocl::oclMat cldst_roi;
+ cv::ocl::oclMat cldst1_roi;
+
+ virtual void SetUp()
+ {
+ type1 = GET_PARAM(0);
+ type2 = GET_PARAM(1);
+ type3 = GET_PARAM(2);
+ type4 = GET_PARAM(3);
+ type5 = GET_PARAM(4);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+ double min = 1, max = 20;
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ if(type1 != nulltype)
+ {
+ mat1 = randomMat(rng, size, type1, min, max, false);
+ clmat1 = mat1;
+ }
+ if(type2 != nulltype)
+ {
+ mat2 = randomMat(rng, size, type2, min, max, false);
+ clmat2 = mat2;
+ }
+ if(type3 != nulltype)
+ {
+ dst = randomMat(rng, size, type3, min, max, false);
+ cldst = dst;
+ }
+ if(type4 != nulltype)
+ {
+ dst1 = randomMat(rng, size, type4, min, max, false);
+ cldst1 = dst1;
+ }
+ if(type5 != nulltype)
+ {
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+ cv::threshold(mask, mask, 0.5, 255., type5);
+ clmask = mask;
+ }
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ }
+
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols - 1; //start
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src2x = 1;
+ src1y = 1;
+ src2y = 1;
+ dstx = 1;
+ dsty = 1;
+ dst1x = 1;
+ dst1y = 1;
+ maskx = 1;
+ masky = 1;
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src2x = 0;
+ src1y = 0;
+ src2y = 0;
+ dstx = 0;
+ dsty = 0;
+ dst1x = 0;
+ dst1y = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ if(type1 != nulltype)
+ {
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ if(type2 != nulltype)
+ {
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+ }
+ if(type3 != nulltype)
+ {
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+ }
+ if(type4 != nulltype)
+ {
+ dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
+ //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+ }
+ if(type5 != nulltype)
+ {
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+ }
+ }
+
+ void random_roi()
+ {
+ cv::RNG &rng = TS::ptr()->get_rng();
+
+ //randomize ROI
+ roicols = rng.uniform(1, mat1.cols);
+ roirows = rng.uniform(1, mat1.rows);
+ src1x = rng.uniform(0, mat1.cols - roicols);
+ src1y = rng.uniform(0, mat1.rows - roirows);
+ src2x = rng.uniform(0, mat2.cols - roicols);
+ src2y = rng.uniform(0, mat2.rows - roirows);
+ dstx = rng.uniform(0, dst.cols - roicols);
+ dsty = rng.uniform(0, dst.rows - roirows);
+ dst1x = rng.uniform(0, dst1.cols - roicols);
+ dst1y = rng.uniform(0, dst1.rows - roirows);
+ maskx = rng.uniform(0, mask.cols - roicols);
+ masky = rng.uniform(0, mask.rows - roirows);
+
+ if(type1 != nulltype)
+ {
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
+ }
+ if(type2 != nulltype)
+ {
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows));
+ }
+ if(type3 != nulltype)
+ {
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
+ }
+ if(type4 != nulltype)
+ {
+ dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
+ //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows));
+ }
+ if(type5 != nulltype)
+ {
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows));
+ }
+ }
};
////////////////////////////////equalizeHist//////////////////////////////////////////
struct equalizeHist : ImgprocTestBase {};
-TEST_P(equalizeHist, MatType)
-{
- if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
- {
- cout<<"Unsupported type"<<endl;
- EXPECT_DOUBLE_EQ(0.0, 0.0);
- }
- else
- {
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::equalizeHist(mat1_roi, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows));
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- //cldst.download(cpu_cldst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(equalizeHist, MatType)
+{
+ if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+ {
+ cout << "Unsupported type" << endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::equalizeHist(mat1_roi, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ cldst_roi = cldst(Rect(dstx, dsty, roicols, roirows));
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ //cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::equalizeHist(clmat1_roi, cldst_roi);
+ };
#endif
- }
+ }
}
struct bilateralFilter : ImgprocTestBase {};
-TEST_P(bilateralFilter, Mat)
-{
- double sigmacolor = 50.0;
- int radius = 9;
- int d = 2*radius+1;
- double sigmaspace = 20.0;
- int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
- //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
- if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
- {
- cout<<"Unsupported type"<<endl;
- EXPECT_DOUBLE_EQ(0.0, 0.0);
- }
- else
- {
- for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::bilateralFilter(mat1_roi, dst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- cldst.download(cpu_cldst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(bilateralFilter, Mat)
+{
+ double sigmacolor = 50.0;
+ int radius = 9;
+ int d = 2 * radius + 1;
+ double sigmaspace = 20.0;
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101*/};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+
+ if (mat1.depth() != CV_8U || mat1.type() != dst.type())
+ {
+ cout << "Unsupported type" << endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+ for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++)
+ {
+ cout << borderstr[i] << endl;
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
+ {
+ continue;
+ }
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- };
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d,sigmacolor,sigmaspace, bordertype[i]);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]);
+ };
#endif
- };
+ };
- }
+ }
}
////////////////////////////////copyMakeBorder////////////////////////////////////////////
struct CopyMakeBorder : ImgprocTestBase {};
-TEST_P(CopyMakeBorder, Mat)
-{
- int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101};
- //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
- int top=5;
- int bottom=5;
- int left=6;
- int right=6;
- if (mat1.type() != dst.type())
- {
- cout<<"Unsupported type"<<endl;
- EXPECT_DOUBLE_EQ(0.0, 0.0);
- }
- else
- {
- for(int i=0;i<sizeof(bordertype)/sizeof(int);i++){
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::copyMakeBorder(mat1_roi, dst_roi, top,bottom,left,right, bordertype[i]| cv::BORDER_ISOLATED,cv::Scalar(1.0));
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,top,bottom,left,right, bordertype[i]| cv::BORDER_ISOLATED,cv::Scalar(1.0));
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- cldst.download(cpu_cldst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(CopyMakeBorder, Mat)
+{
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
+ //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+ int top = 5;
+ int bottom = 5;
+ int left = 6;
+ int right = 6;
+ if (mat1.type() != dst.type())
+ {
+ cout << "Unsupported type" << endl;
+ EXPECT_DOUBLE_EQ(0.0, 0.0);
+ }
+ else
+ {
+ for(int i = 0; i < sizeof(bordertype) / sizeof(int); i++)
+ {
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < 1; k++) //don't support roi perf test
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- };
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi,top,bottom,left,right, bordertype[i]| cv::BORDER_ISOLATED,cv::Scalar(1.0));
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
+ };
#endif
- };
- }
+ };
+ }
}
////////////////////////////////cornerMinEigenVal//////////////////////////////////////////
struct cornerMinEigenVal : ImgprocTestBase {};
-TEST_P(cornerMinEigenVal, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- int blockSize = 7, apertureSize= 3;//1 + 2 * (rand() % 4);
- int borderType = cv::BORDER_REFLECT;
- t0 = (double)cvGetTickCount();//cpu start
- cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- cldst.download(cpu_cldst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- int blockSize = 7, apertureSize= 1 + 2 * (rand() % 4);
- int borderType = cv::BORDER_REFLECT;
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- };
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
- };
-#endif
-}
+TEST_P(cornerMinEigenVal, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ int blockSize = 7, apertureSize = 3; //1 + 2 * (rand() % 4);
+ int borderType = cv::BORDER_REFLECT;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
-////////////////////////////////cornerHarris//////////////////////////////////////////
+ if(j == 0)
+ continue;
-struct cornerHarris : ImgprocTestBase {};
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
-TEST_P(cornerHarris, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- int blockSize = 7, apertureSize= 3;
- int borderType = cv::BORDER_REFLECT;
- double kk = 2;
- t0 = (double)cvGetTickCount();//cpu start
- cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- cldst.download(cpu_cldst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- double kk = 2;
- int blockSize = 7, apertureSize= 3;
- int borderType = cv::BORDER_REFLECT;
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- };
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4);
+ int borderType = cv::BORDER_REFLECT;
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType);
+ };
#endif
-
}
-////////////////////////////////integral/////////////////////////////////////////////////
-
-struct integral : ImgprocTestBase {};
-
-TEST_P(integral, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- t0 = (double)cvGetTickCount();//cpu start
- cv::integral(mat1_roi, dst_roi, dst1_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- }
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_cldst;
- cv::Mat cpu_cldst1;
- cldst.download(cpu_cldst);//download
- cldst1.download(cpu_cldst1);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- if(type1!=nulltype)
- {
- clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows));
- };
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
- };
-#endif
-}
-
+////////////////////////////////cornerHarris//////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// warpAffine & warpPerspective
+struct cornerHarris : ImgprocTestBase {};
-PARAM_TEST_CASE(WarpTestBase, MatType, int)
+TEST_P(cornerHarris, Mat)
{
- int type;
- cv::Size size;
- int interpolation;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int src_roicols;
- int src_roirows;
- int dst_roicols;
- int dst_roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- //dsize = GET_PARAM(1);
- interpolation = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- size = cv::Size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
-
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- src_roicols = mat1.cols-1; //start
- src_roirows = mat1.rows-1;
- dst_roicols=dst.cols-1;
- dst_roirows=dst.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
-
- }else
- {
- src_roicols = mat1.cols;
- src_roirows = mat1.rows;
- dst_roicols=dst.cols;
- dst_roirows=dst.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
-
- };
- mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
- dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
-
-
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ int blockSize = 7, apertureSize = 3;
+ int borderType = cv::BORDER_REFLECT;
+ double kk = 2;
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
-};
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cldst.download(cpu_cldst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
-/////warpAffine
+ if(j == 0)
+ continue;
-struct WarpAffine : WarpTestBase{};
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
-TEST_P(WarpAffine, Mat)
-{
- static const double coeffs[2][3] =
- {
- {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
- {sin(3.14 / 6), cos(3.14 / 6), -100.0}
- };
- Mat M(2, 3, CV_64F, (void*)coeffs);
-
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ double kk = 2;
+ int blockSize = 7, apertureSize = 3;
+ int borderType = cv::BORDER_REFLECT;
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType);
+ };
#endif
}
-// warpPerspective
+////////////////////////////////integral/////////////////////////////////////////////////
-struct WarpPerspective : WarpTestBase{};
+struct integral : ImgprocTestBase {};
-TEST_P(WarpPerspective, Mat)
+TEST_P(integral, Mat)
{
- static const double coeffs[3][3] =
- {
- {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
- {sin(3.14 / 6), cos(3.14 / 6), -100.0},
- {0.0, 0.0, 1.0}
- };
- Mat M(3, 3, CV_64F, (void*)coeffs);
-
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::integral(mat1_roi, dst_roi, dst1_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ }
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_cldst;
+ cv::Mat cpu_cldst1;
+ cldst.download(cpu_cldst);//download
+ cldst1.download(cpu_cldst1);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ if(type1 != nulltype)
+ {
+ clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows));
+ };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi);
+ };
#endif
-
}
+
/////////////////////////////////////////////////////////////////////////////////////////////////
-// remap
-//////////////////////////////////////////////////////////////////////////////////////////////////
+// warpAffine & warpPerspective
-PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
+PARAM_TEST_CASE(WarpTestBase, MatType, int)
{
- int srcType;
- int map1Type;
- int map2Type;
- cv::Scalar val;
-
+ int type;
+ cv::Size size;
int interpolation;
- int bordertype;
- cv::Mat src;
+ //src mat
+ cv::Mat mat1;
cv::Mat dst;
- cv::Mat map1;
- cv::Mat map2;
-
+ // set up roi
int src_roicols;
int src_roirows;
int dst_roicols;
int dst_roirows;
- int map1_roicols;
- int map1_roirows;
- int map2_roicols;
- int map2_roirows;
- int srcx;
- int srcy;
+ int src1x;
+ int src1y;
int dstx;
int dsty;
- int map1x;
- int map1y;
- int map2x;
- int map2y;
- cv::Mat src_roi;
- cv::Mat dst_roi;
- cv::Mat map1_roi;
- cv::Mat map2_roi;
- //ocl mat for testing
- cv::ocl::oclMat gdst;
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
//ocl mat with roi
- cv::ocl::oclMat gsrc_roi;
- cv::ocl::oclMat gdst_roi;
- cv::ocl::oclMat gmap1_roi;
- cv::ocl::oclMat gmap2_roi;
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
virtual void SetUp()
{
- srcType = GET_PARAM(0);
- map1Type = GET_PARAM(1);
- map2Type = GET_PARAM(2);
- interpolation = GET_PARAM(3);
- bordertype = GET_PARAM(4);
+ type = GET_PARAM(0);
+ //dsize = GET_PARAM(1);
+ interpolation = GET_PARAM(1);
- cv::RNG& rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
+ size = cv::Size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ src_roicols = mat1.cols - 1; //start
+ src_roirows = mat1.rows - 1;
+ dst_roicols = dst.cols - 1;
+ dst_roirows = dst.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+
+ }
+ else
+ {
+ src_roicols = mat1.cols;
+ src_roirows = mat1.rows;
+ dst_roicols = dst.cols;
+ dst_roirows = dst.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+
+ };
+ mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows));
+ dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+
+ }
+
+};
+
+/////warpAffine
+
+struct WarpAffine : WarpTestBase {};
+
+TEST_P(WarpAffine, Mat)
+{
+ static const double coeffs[2][3] =
+ {
+ {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+ {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+ };
+ Mat M(2, 3, CV_64F, (void *)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation);
+ };
+#endif
+
+}
+
+
+// warpPerspective
+
+struct WarpPerspective : WarpTestBase {};
+
+TEST_P(WarpPerspective, Mat)
+{
+ static const double coeffs[3][3] =
+ {
+ {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
+ {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+ {0.0, 0.0, 1.0}
+ };
+ Mat M(3, 3, CV_64F, (void *)coeffs);
+
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// remap
+//////////////////////////////////////////////////////////////////////////////////////////////////
+
+PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int)
+{
+ int srcType;
+ int map1Type;
+ int map2Type;
+ cv::Scalar val;
+
+ int interpolation;
+ int bordertype;
+
+ cv::Mat src;
+ cv::Mat dst;
+ cv::Mat map1;
+ cv::Mat map2;
+
+
+ int src_roicols;
+ int src_roirows;
+ int dst_roicols;
+ int dst_roirows;
+ int map1_roicols;
+ int map1_roirows;
+ int map2_roicols;
+ int map2_roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+ int map1x;
+ int map1y;
+ int map2x;
+ int map2y;
+
+ cv::Mat src_roi;
+ cv::Mat dst_roi;
+ cv::Mat map1_roi;
+ cv::Mat map2_roi;
+
+ //ocl mat for testing
+ cv::ocl::oclMat gdst;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gsrc_roi;
+ cv::ocl::oclMat gdst_roi;
+ cv::ocl::oclMat gmap1_roi;
+ cv::ocl::oclMat gmap2_roi;
+
+ virtual void SetUp()
+ {
+ srcType = GET_PARAM(0);
+ map1Type = GET_PARAM(1);
+ map2Type = GET_PARAM(2);
+ interpolation = GET_PARAM(3);
+ bordertype = GET_PARAM(4);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT);
cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT);
cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT);
double min = 5, max = 16;
- if(srcType != nulltype)
- {
- src = randomMat(rng, srcSize, srcType, min, max, false);
- }
- if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2&& map2Type == nulltype))
- {
- map1 = randomMat(rng, map1Size, map1Type, min, max, false);
+ if(srcType != nulltype)
+ {
+ src = randomMat(rng, srcSize, srcType, min, max, false);
+ }
+ if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
+ {
+ map1 = randomMat(rng, map1Size, map1Type, min, max, false);
+
+ }
+ else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
+ {
+ map1 = randomMat(rng, map1Size, map1Type, min, max, false);
+ map2 = randomMat(rng, map1Size, map1Type, min, max, false);
+ }
+
+ else
+ cout << "The wrong input type" << endl;
+
+ dst = randomMat(rng, map1Size, srcType, min, max, false);
+ switch (src.channels())
+ {
+ case 1:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
+ break;
+ case 2:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
+ break;
+ case 3:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
+ break;
+ case 4:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
+ break;
+ }
+
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ //if you want to use undefault device, set it here
+ //setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ //randomize ROI
+ dst_roicols = dst.cols - 1;
+ dst_roirows = dst.rows - 1;
+
+ src_roicols = src.cols - 1;
+ src_roirows = src.rows - 1;
+
+
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ dst_roicols = dst.cols;
+ dst_roirows = dst.rows;
+
+ src_roicols = src.cols;
+ src_roirows = src.rows;
+
+
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ }
+ map1_roicols = dst_roicols;
+ map1_roirows = dst_roirows;
+ map2_roicols = dst_roicols;
+ map2_roirows = dst_roirows;
+ map1x = dstx;
+ map1y = dsty;
+ map2x = dstx;
+ map2y = dsty;
+
+ if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
+ {
+ map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
+ gmap1_roi = map1_roi;
+ }
+
+ else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
+ {
+ map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
+ map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows));
+ gmap1_roi = map1_roi;
+ gmap2_roi = map2_roi;
+ }
+ dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows));
+
+ }
+};
+
+TEST_P(Remap, Mat)
+{
+ if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
+ {
+ cout << "LINEAR don't support the map1Type and map2Type" << endl;
+ return;
+ }
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = 0; k < 2; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start
+ gsrc_roi = src_roi;
+ gdst = dst;
+ gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+ t2 = (double)cvGetTickCount();//kernel
+ cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ gdst.download(cpu_dst);
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end
+
+ if (j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+ gdst = dst;
+ gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ gsrc_roi = src_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
+ };
+#endif
+
+}
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+// resize
+
+PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
+{
+ int type;
+ cv::Size dsize;
+ double fx, fy;
+ int interpolation;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int src_roicols;
+ int src_roirows;
+ int dst_roicols;
+ int dst_roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dsize = GET_PARAM(1);
+ fx = GET_PARAM(2);
+ fy = GET_PARAM(3);
+ interpolation = GET_PARAM(4);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ if(dsize == cv::Size() && !(fx > 0 && fy > 0))
+ {
+ cout << "invalid dsize and fx fy" << endl;
+ return;
+ }
+
+ if(dsize == cv::Size())
+ {
+ dsize.width = (int)(size.width * fx);
+ dsize.height = (int)(size.height * fy);
+ }
+
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, dsize, type, 5, 16, false);
+
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ src_roicols = mat1.cols - 1; //start
+ src_roirows = mat1.rows - 1;
+ dst_roicols = dst.cols - 1;
+ dst_roirows = dst.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
+
+ }
+ else
+ {
+ src_roicols = mat1.cols;
+ src_roirows = mat1.rows;
+ dst_roicols = dst.cols;
+ dst_roirows = dst.rows;
+ src1x = 0;
+ src1y = 0;
+ dstx = 0;
+ dsty = 0;
+
+ };
+ mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows));
+ dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+
+ }
+
+};
+
+TEST_P(Resize, Mat)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ gmat1 = mat1_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
+ };
+#endif
+
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+//threshold
+
+PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
+{
+ int type;
+ int threshOp;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ threshOp = GET_PARAM(1);
- }
- else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
- {
- map1 = randomMat(rng, map1Size, map1Type, min, max, false);
- map2 = randomMat(rng, map1Size, map1Type, min, max, false);
- }
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
- else
- cout<<"The wrong input type"<<endl;
+ mat1 = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, map1Size, srcType, min, max, false);
- switch (src.channels())
- {
- case 1:
- val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
- break;
- case 2:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
- break;
- case 3:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
- break;
- case 4:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
- break;
- }
-
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
- //if you want to use undefault device, set it here
- //setDevice(oclinfo[0]);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
//cv::ocl::setBinpath(CLBINPATH);
}
void Has_roi(int b)
{
+ //cv::RNG& rng = TS::ptr()->get_rng();
if(b)
{
//randomize ROI
- dst_roicols = dst.cols - 1;
- dst_roirows = dst.rows - 1;
-
- src_roicols = src.cols - 1;
- src_roirows = src.rows - 1;
+ roicols = mat1.cols - 1; //start
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ dstx = 1;
+ dsty = 1;
-
- srcx = 1;
- srcy = 1;
- dstx = 1;
- dsty = 1;
}
else
{
- dst_roicols = dst.cols;
- dst_roirows = dst.rows;
-
- src_roicols = src.cols;
- src_roirows = src.rows;
-
-
- srcx = 0;
- srcy = 0;
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
dstx = 0;
dsty = 0;
- }
- map1_roicols = dst_roicols;
- map1_roirows = dst_roirows;
- map2_roicols = dst_roicols;
- map2_roirows = dst_roirows;
- map1x = dstx;
- map1y = dsty;
- map2x = dstx;
- map2y = dsty;
- if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2&& map2Type == nulltype))
- {
- map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
- gmap1_roi = map1_roi;
- }
+ };
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
- else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
- {
- map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
- map2_roi = map2(Rect(map2x,map2y,map2_roicols,map2_roirows));
- gmap1_roi = map1_roi;
- gmap2_roi = map2_roi;
- }
- dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
- src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows));
}
};
-TEST_P(Remap, Mat)
+TEST_P(Threshold, Mat)
{
- if((interpolation == 1 && map1Type == CV_16SC2) ||(map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
{
- cout << "LINEAR don't support the map1Type and map2Type" << endl;
- return;
- }
- int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
- const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k = 0; k < 2; k++){
totalcputick = 0;
totalgputick = 0;
totalgputick_kernel = 0;
- for(int j = 0; j < LOOP_TIMES+1; j++)
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
Has_roi(k);
+ double maxVal = randomDouble(20.0, 127.0);
+ double thresh = randomDouble(0.0, maxVal);
t0 = (double)cvGetTickCount();//cpu start
- cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
+ cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp);
t0 = (double)cvGetTickCount() - t0;//cpu end
- t1 = (double)cvGetTickCount();//gpu start
- gsrc_roi = src_roi;
- gdst = dst;
- gdst_roi = gdst(Rect(dstx,dsty,dst_roicols,dst_roirows));
+ t1 = (double)cvGetTickCount();//gpu start1
- t2 = (double)cvGetTickCount();//kernel
- cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
t2 = (double)cvGetTickCount() - t2;//kernel
-
+
cv::Mat cpu_dst;
- gdst.download(cpu_dst);
-
- t1 = (double)cvGetTickCount() - t1;//gpu end
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if (j == 0)
+ if(j == 0)
continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
}
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
#else
- for(int j = 0; j < 2; j ++)
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
{
Has_roi(j);
- gdst = dst;
- gdst_roi = gdst(Rect(dstx,dsty,dst_roicols,dst_roirows));
- gsrc_roi = src_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val);
+ double maxVal = randomDouble(20.0, 127.0);
+ double thresh = randomDouble(0.0, maxVal);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ gmat1 = mat1_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
};
#endif
}
+///////////////////////////////////////////////////////////////////////////////////////////////////
+//meanShift
+PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
+{
+ int type, typeCoor;
+ int sp, sr;
+ cv::TermCriteria crit;
+ //src mat
+ cv::Mat src;
+ cv::Mat dst;
+ cv::Mat dstCoor;
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// resize
+ //set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
-PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int)
-{
- int type;
- cv::Size dsize;
- double fx, fy;
- int interpolation;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int src_roicols;
- int src_roirows;
- int dst_roicols;
- int dst_roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- dsize = GET_PARAM(1);
- fx = GET_PARAM(2);
- fy = GET_PARAM(3);
- interpolation = GET_PARAM(4);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- if(dsize == cv::Size() && !(fx > 0 && fy > 0))
- {
- cout << "invalid dsize and fx fy" << endl;
- return;
- }
-
- if(dsize == cv::Size())
- {
- dsize.width = (int)(size.width * fx);
- dsize.height = (int)(size.height * fy);
- }
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, dsize, type, 5, 16, false);
-
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- src_roicols = mat1.cols-1; //start
- src_roirows = mat1.rows-1;
- dst_roicols=dst.cols-1;
- dst_roirows=dst.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
-
- }else
- {
- src_roicols = mat1.cols;
- src_roirows = mat1.rows;
- dst_roicols=dst.cols;
- dst_roirows=dst.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
-
- };
- mat1_roi = mat1(Rect(src1x,src1y,src_roicols,src_roirows));
- dst_roi = dst(Rect(dstx,dsty,dst_roicols,dst_roirows));
-
-
- }
+ //src mat with roi
+ cv::Mat src_roi;
+ cv::Mat dst_roi;
+ cv::Mat dstCoor_roi;
-};
+ //ocl dst mat
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gdstCoor;
-TEST_P(Resize, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
-
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,dst_roicols,dst_roirows));
- gmat1 = mat1_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
- };
-#endif
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl mat with roi
+ cv::ocl::oclMat gsrc_roi;
+ cv::ocl::oclMat gdst_roi;
+ cv::ocl::oclMat gdstCoor_roi;
-}
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ typeCoor = GET_PARAM(1);
+ sp = GET_PARAM(2);
+ sr = GET_PARAM(3);
+ crit = GET_PARAM(4);
-/////////////////////////////////////////////////////////////////////////////////////////////////
-//threshold
+ cv::RNG &rng = TS::ptr()->get_rng();
-PARAM_TEST_CASE(Threshold, MatType, ThreshOp)
-{
- int type;
- int threshOp;
-
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- threshOp = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
-
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat1.cols-1; //start
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- dstx = 1;
- dsty =1;
-
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- dstx = 0;
- dsty = 0;
-
- };
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
-
- }
-};
+ // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
+ cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-TEST_P(Threshold, Mat)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- double maxVal = randomDouble(20.0, 127.0);
- double thresh = randomDouble(0.0, maxVal);
- t0 = (double)cvGetTickCount();//cpu start
- cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
-
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- double maxVal = randomDouble(20.0, 127.0);
- double thresh = randomDouble(0.0, maxVal);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- gmat1 = mat1_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp);
- };
-#endif
+ src = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
-}
-///////////////////////////////////////////////////////////////////////////////////////////////////
-//meanShift
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath(CLBINPATH);
+ }
-PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria)
-{
- int type, typeCoor;
- int sp, sr;
- cv::TermCriteria crit;
- //src mat
- cv::Mat src;
- cv::Mat dst;
- cv::Mat dstCoor;
-
- //set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat src_roi;
- cv::Mat dst_roi;
- cv::Mat dstCoor_roi;
-
- //ocl dst mat
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gdstCoor;
-
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl mat with roi
- cv::ocl::oclMat gsrc_roi;
- cv::ocl::oclMat gdst_roi;
- cv::ocl::oclMat gdstCoor_roi;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- typeCoor = GET_PARAM(1);
- sp = GET_PARAM(2);
- sr = GET_PARAM(3);
- crit = GET_PARAM(4);
-
- cv::RNG &rng = TS::ptr()->get_rng();
-
- // MWIDTH=256, MHEIGHT=256. defined in utility.hpp
- cv::Size size = cv::Size(MWIDTH, MHEIGHT);
-
- src = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- dstCoor = randomMat(rng, size, typeCoor, 5, 16, false);
-
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- if(b)
- {
- //randomize ROI
- roicols = src.cols - 1;
- roirows = src.rows - 1;
- srcx = 1;
- srcy = 1;
- dstx = 1;
- dsty = 1;
- }else
- {
- roicols = src.cols;
- roirows = src.rows;
- srcx = 0;
- srcy = 0;
- dstx = 0;
- dsty = 0;
- };
-
- src_roi = src(Rect(srcx, srcy, roicols, roirows));
- dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
- dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
-
- gdst = dst;
- gdstCoor = dstCoor;
- }
+ void Has_roi(int b)
+ {
+ if(b)
+ {
+ //randomize ROI
+ roicols = src.cols - 1;
+ roirows = src.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = src.cols;
+ roirows = src.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ src_roi = src(Rect(srcx, srcy, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows));
+
+ gdst = dst;
+ gdstCoor = dstCoor;
+ }
};
/////////////////////////meanShiftFiltering/////////////////////////////
TEST_P(meanShiftFiltering, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double t1=0;
- double t2=0;
- for(int k=0;k<2;k++)
- {
- double totalgputick=0;
- double totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
+#ifndef PRINT_KERNEL_RUN_TIME
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = 0; k < 2; k++)
+ {
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
- gsrc_roi = src_roi;
- gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_gdst;
- gdst.download(cpu_gdst);//download
+ cv::Mat cpu_gdst;
+ gdst.download(cpu_gdst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
- gsrc_roi = src_roi;
- gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
- };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit);
+ };
#endif
}
TEST_P(meanShiftProc, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double t1=0;
- double t2=0;
- for(int k=0;k<2;k++)
- {
- double totalgputick=0;
- double totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
+#ifndef PRINT_KERNEL_RUN_TIME
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = 0; k < 2; k++)
+ {
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
- gsrc_roi = src_roi;
- gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
- gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_gdstCoor;
- gdstCoor.download(cpu_gdstCoor);//download
+ cv::Mat cpu_gdstCoor;
+ gdstCoor.download(cpu_gdstCoor);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
- gsrc_roi = src_roi;
- gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
- gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
+ gsrc_roi = src_roi;
+ gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi
+ gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows));
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
- };
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit);
+ };
#endif
}
///////////////////////////////////////////////////////////////////////////////////////////
//hist
-void calcHistGold(const cv::Mat& src, cv::Mat& hist)
+void calcHistGold(const cv::Mat &src, cv::Mat &hist)
{
hist.create(1, 256, CV_32SC1);
hist.setTo(cv::Scalar::all(0));
- int* hist_row = hist.ptr<int>();
+ int *hist_row = hist.ptr<int>();
for (int y = 0; y < src.rows; ++y)
{
- const uchar* src_row = src.ptr(y);
+ const uchar *src_row = src.ptr(y);
for (int x = 0; x < src.cols; ++x)
++hist_row[src_row[x]];
cv::Mat src_roi;
//ocl dst mat, dst_hist and gdst_hist don't have roi
cv::ocl::oclMat gdst_hist;
-
+
//ocl mat with roi
cv::ocl::oclMat gsrc_roi;
-// std::vector<cv::ocl::Info> oclinfo;
+ // std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type_src = GET_PARAM(0);
-
+
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
src = randomMat(rng, size, type_src, 0, 256, false);
-// int devnums = getDevice(oclinfo);
-// CV_Assert(devnums > 0);
+ // int devnums = getDevice(oclinfo);
+ // CV_Assert(devnums > 0);
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
}
if(b)
{
//randomize ROI
- roicols = src.cols-1;
- roirows = src.rows-1;
+ roicols = src.cols - 1;
+ roirows = src.rows - 1;
srcx = 1;
srcy = 1;
- }else
+ }
+ else
{
roicols = src.cols;
roirows = src.rows;
TEST_P(calcHist, Mat)
{
-#ifndef PRINT_KERNEL_RUN_TIME
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=0;k<2;k++)
- {
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- calcHistGold(src_roi, dst_hist);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
-
- gsrc_roi = src_roi;
-
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::calcHist(gsrc_roi, gdst_hist);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_hist;
- gdst_hist.download(cpu_hist);//download
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalcputick=t0+totalcputick;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+#ifndef PRINT_KERNEL_RUN_TIME
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = 0; k < 2; k++)
+ {
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ calcHistGold(src_roi, dst_hist);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ gsrc_roi = src_roi;
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::calcHist(gsrc_roi, gdst_hist);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_hist;
+ gdst_hist.download(cpu_hist);//download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalcputick = t0 + totalcputick;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = 0; j < 2; j ++)
- {
- Has_roi(j);
-
- gsrc_roi = src_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::calcHist(gsrc_roi, gdst_hist);
- };
+ for(int j = 0; j < 2; j ++)
+ {
+ Has_roi(j);
+
+ gsrc_roi = src_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::calcHist(gsrc_roi, gdst_hist);
+ };
#endif
}
//************test*******************
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine(
- ONE_TYPE(CV_8UC1),
- NULL_TYPE,
- ONE_TYPE(CV_8UC1),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
-
-//INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
-// ONE_TYPE(CV_8UC1),
-// NULL_TYPE,
-// ONE_TYPE(CV_8UC1),
-// NULL_TYPE,
-// NULL_TYPE,
-// Values(false))); // Values(false) is the reserved parameter
-//
-//
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
+ Values(CV_8UC1, CV_8UC3),
+ NULL_TYPE,
+ Values(CV_8UC1, CV_8UC3),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
+
+
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
- Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
- NULL_TYPE,
- Values(CV_8UC1,CV_8UC4/*,CV_32SC1*/),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/),
+ NULL_TYPE,
+ Values(CV_8UC1, CV_8UC4/*,CV_32SC1*/),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
- Values(CV_8UC1,CV_32FC1),
- NULL_TYPE,
- ONE_TYPE(CV_32FC1),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
- Values(CV_8UC1,CV_32FC1),
- NULL_TYPE,
- ONE_TYPE(CV_32FC1),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
- ONE_TYPE(CV_8UC1),
- NULL_TYPE,
- ONE_TYPE(CV_32SC1),
- ONE_TYPE(CV_32FC1),
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ ONE_TYPE(CV_8UC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32SC1),
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
- (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
- (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+ (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+ (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
- (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
- (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
- (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+ (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
+ (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+ (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
- Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
+ Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(
- Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY),
- ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC),
- ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))));
+ Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY),
+ ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC),
+ ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV))));
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine(
- ONE_TYPE(CV_8UC4),
- ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering
- Values(5),
- Values(6),
- Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
- ));
+ ONE_TYPE(CV_8UC4),
+ ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering
+ Values(5),
+ Values(6),
+ Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+ ));
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
- ONE_TYPE(CV_8UC4),
- ONE_TYPE(CV_16SC2),
- Values(5),
- Values(6),
- Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
- ));
+ ONE_TYPE(CV_8UC4),
+ ONE_TYPE(CV_16SC2),
+ Values(5),
+ Values(6),
+ Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+ ));
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(CV_32FC1, CV_16SC2, CV_32FC2),Values(-1,CV_32FC1),
- Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
- Values((int)cv::BORDER_CONSTANT)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1),
+ Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
+ Values((int)cv::BORDER_CONSTANT)));
INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
- ONE_TYPE(CV_8UC1),
- ONE_TYPE(CV_32SC1) //no use
-));
+ ONE_TYPE(CV_8UC1),
+ ONE_TYPE(CV_32SC1) //no use
+ ));
#endif // HAVE_OPENCL
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
-const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
+const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod)
{
- cv::Size size;
- cv::Size templ_size;
- int cn;
- int method;
- //vector<cv::ocl::Info> oclinfo;
-
- virtual void SetUp()
- {
- size = GET_PARAM(0);
- templ_size = GET_PARAM(1);
- cn = GET_PARAM(2);
- method = GET_PARAM(3);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- }
+ cv::Size size;
+ cv::Size templ_size;
+ int cn;
+ int method;
+ //vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ size = GET_PARAM(0);
+ templ_size = GET_PARAM(1);
+ cn = GET_PARAM(2);
+ method = GET_PARAM(3);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
};
struct MatchTemplate8U : MatchTemplate {};
TEST_P(MatchTemplate8U, Performance)
{
- std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
- std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
- std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
- std::cout << "Channels: " << cn << std::endl;
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
+ std::cout << "Channels: " << cn << std::endl;
- cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
- cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
- cv::Mat dst_gold;
- cv::ocl::oclMat dst;
+ cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
+ cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
-
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t1=0;
- double t2=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
- t1 = (double)cvGetTickCount();//gpu start1
+ double t1 = 0;
+ double t2 = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
- cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
+ cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- dst.download (cpu_dst);//download
+ cv::Mat cpu_dst;
+ dst.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
+ if(j == 0)
+ continue;
- totalgputick=t1+totalgputick;
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick = t1 + totalgputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
+ }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
struct MatchTemplate32F : MatchTemplate {};
TEST_P(MatchTemplate32F, Performance)
{
- std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
- std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
- std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
- std::cout << "Channels: " << cn << std::endl;
- cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
- cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
+ std::cout << "Channels: " << cn << std::endl;
+ cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn));
+ cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
- cv::Mat dst_gold;
- cv::ocl::oclMat dst;
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
- double totalgputick=0;
- double totalgputick_kernel=0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
- double t1=0;
- double t2=0;
- for(int j = 0; j < LOOP_TIMES; j ++)
- {
+ double t1 = 0;
+ double t2 = 0;
+ for(int j = 0; j < LOOP_TIMES; j ++)
+ {
- t1 = (double)cvGetTickCount();//gpu start1
+ t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload
- cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
+ cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ t2 = (double)cvGetTickCount() - t2;//kernel
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
- t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ dst.download (cpu_dst);//download
- cv::Mat cpu_dst;
- dst.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
- t1 = (double)cvGetTickCount() - t1;//gpu end1
+ totalgputick = t1 + totalgputick;
- totalgputick=t1+totalgputick;
-
- totalgputick_kernel=t2+totalgputick_kernel;
+ totalgputick_kernel = t2 + totalgputick_kernel;
- }
+ }
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
- testing::Combine(
- testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
- testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
- testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
- ALL_TEMPLATE_METHODS
- )
-);
+INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
+ testing::Combine(
+ testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
+ testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+ testing::Values(Channels(1), Channels(4)/*, Channels(3)*/),
+ ALL_TEMPLATE_METHODS
+ )
+ );
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
- testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT),cv::Size(1800, 1500)),
- testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
- testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
- testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+ testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)),
+ testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+ testing::Values(Channels(1), Channels(4) /*, Channels(3)*/),
+ testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
#endif //HAVE_OPENCL
\ No newline at end of file
////////////////////////////////converto/////////////////////////////////////////////////
PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType)
{
- int type;
- int dst_type;
-
- //src mat
- cv::Mat mat;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- dst_type = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat.cols-1; //start
- roirows = mat.rows-1;
- srcx = 1;
- srcy = 1;
- dstx = 1;
- dsty =1;
- }else
- {
- roicols = mat.cols;
- roirows = mat.rows;
- srcx = 0;
- srcy = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- //gdst_whole = dst;
- //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gmat = mat_roi;
- }
+ int type;
+ int dst_type;
+
+ //src mat
+ cv::Mat mat;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ dst_type = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols - 1; //start
+ roirows = mat.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ }
+ else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat = mat_roi;
+ }
};
-struct ConvertTo :ConvertToTestBase {};
-
-TEST_P(ConvertTo, Accuracy)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- mat_roi.convertTo(dst_roi, dst_type);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- t2=(double)cvGetTickCount();//kernel
- gmat.convertTo(gdst, dst_type);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+struct ConvertTo : ConvertToTestBase {};
+
+TEST_P(ConvertTo, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.convertTo(dst_roi, dst_type);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ gmat.convertTo(gdst, dst_type);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- gmat.convertTo(gdst, dst_type);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ gmat.convertTo(gdst, dst_type);
+ };
#endif
}
PARAM_TEST_CASE(CopyToTestBase, MatType, bool)
{
- int type;
-
- cv::Mat mat;
- cv::Mat mask;
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int dstx;
- int dsty;
- int maskx;
- int masky;
-
- //src mat with roi
- cv::Mat mat_roi;
- cv::Mat mask_roi;
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat;
- cv::ocl::oclMat gdst;
- cv::ocl::oclMat gmask;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat = randomMat(rng, size, type, 5, 16, false);
- dst = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat.cols-1; //start
- roirows = mat.rows-1;
- srcx = 1;
- srcy = 1;
- dstx = 1;
- dsty =1;
- maskx = 1;
- masky = 1;
- }else
- {
- roicols = mat.cols;
- roirows = mat.rows;
- srcx = 0;
- srcy = 0;
- dstx = 0;
- dsty = 0;
- maskx = 0;
- masky = 0;
- };
-
- mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
-
- //gdst_whole = dst;
- //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- //gmat = mat_roi;
- //gmask = mask_roi;
- }
+ int type;
+
+ cv::Mat mat;
+ cv::Mat mask;
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dstx;
+ int dsty;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat mask_roi;
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst;
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ dst = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols - 1; //start
+ roirows = mat.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ dstx = 1;
+ dsty = 1;
+ maskx = 1;
+ masky = 1;
+ }
+ else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dstx = 0;
+ dsty = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+
+ //gdst_whole = dst;
+ //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+
+ //gmat = mat_roi;
+ //gmask = mask_roi;
+ }
};
-struct CopyTo :CopyToTestBase {};
-
-TEST_P(CopyTo, Without_mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- mat_roi.copyTo(dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- t2=(double)cvGetTickCount();//kernel
- gmat.copyTo(gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
-#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
+struct CopyTo : CopyToTestBase {};
- gmat = mat_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- gmat.copyTo(gdst);
- };
+TEST_P(CopyTo, Without_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.copyTo(dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ gmat.copyTo(gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
+#else
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ gmat.copyTo(gdst);
+ };
#endif
}
-TEST_P(CopyTo, With_mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- mat_roi.copyTo(dst_roi,mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- gmat.copyTo(gdst, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(CopyTo, With_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.copyTo(dst_roi, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ gmat.copyTo(gdst, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
-
- gmat = mat_roi;
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- gmat.copyTo(gdst, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+
+ gmat = mat_roi;
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ gmat.copyTo(gdst, gmask);
+ };
#endif
}
PARAM_TEST_CASE(SetToTestBase, MatType, bool)
{
- int type;
- cv::Scalar val;
-
- cv::Mat mat;
- cv::Mat mask;
-
- // set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int maskx;
- int masky;
-
- //src mat with roi
- cv::Mat mat_roi;
- cv::Mat mask_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gmat_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat;
- cv::ocl::oclMat gmask;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat = randomMat(rng, size, type, 5, 16, false);
- mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
-
- cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
- val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat.cols-1; //start
- roirows = mat.rows-1;
- srcx = 1;
- srcy = 1;
- maskx = 1;
- masky = 1;
- }else
- {
- roicols = mat.cols;
- roirows = mat.rows;
- srcx = 0;
- srcy = 0;
- maskx = 0;
- masky = 0;
- };
-
- mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
- mask_roi = mask(Rect(maskx,masky,roicols,roirows));
-
- //gmat_whole = mat;
- //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
-
- //gmask = mask_roi;
- }
+ int type;
+ cv::Scalar val;
+
+ cv::Mat mat;
+ cv::Mat mask;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int maskx;
+ int masky;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+ cv::Mat mask_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gmat_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gmask;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, type, 5, 16, false);
+ mask = randomMat(rng, size, CV_8UC1, 0, 2, false);
+
+ cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
+ val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0));
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols - 1; //start
+ roirows = mat.rows - 1;
+ srcx = 1;
+ srcy = 1;
+ maskx = 1;
+ masky = 1;
+ }
+ else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ maskx = 0;
+ masky = 0;
+ };
+
+ mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
+ mask_roi = mask(Rect(maskx, masky, roicols, roirows));
+
+ //gmat_whole = mat;
+ //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
+
+ //gmask = mask_roi;
+ }
};
-struct SetTo :SetToTestBase {};
-
-TEST_P(SetTo, Without_mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- mat_roi.setTo(val);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat_whole = mat;
- gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
- t2=(double)cvGetTickCount();//kernel
- gmat.setTo(val);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gmat_whole.download(cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+struct SetTo : SetToTestBase {};
+
+TEST_P(SetTo, Without_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.setTo(val);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
+ t2 = (double)cvGetTickCount(); //kernel
+ gmat.setTo(val);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gmat_whole.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat_whole = mat;
- gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- gmat.setTo(val);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ gmat.setTo(val);
+ };
#endif
}
-TEST_P(SetTo, With_mask)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
-
- t0 = (double)cvGetTickCount();//cpu start
- mat_roi.setTo(val, mask_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gmat_whole = mat;
- gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
-
- gmask = mask_roi;
- t2=(double)cvGetTickCount();//kernel
- gmat.setTo(val, gmask);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gmat_whole.download(cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(SetTo, With_mask)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+
+ t0 = (double)cvGetTickCount();//cpu start
+ mat_roi.setTo(val, mask_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
+
+ gmask = mask_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ gmat.setTo(val, gmask);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gmat_whole.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat_whole = mat;
- gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows));
-
- gmask = mask_roi;
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- gmat.setTo(val, gmask);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat_whole = mat;
+ gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows));
+
+ gmask = mask_roi;
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ gmat.setTo(val, gmask);
+ };
#endif
}
-
-//**********test************
+PARAM_TEST_CASE(DataTransfer, MatType, bool)
+{
+ int type;
+ cv::Mat mat;
+ cv::ocl::oclMat gmat_whole;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+ mat = randomMat(rng, size, type, 5, 16, false);
+ }
+};
+TEST_P(DataTransfer, perf)
+{
+ double totaluploadtick = 0;
+ double totaldownloadtick = 0;
+ double totaltick = 0;
+ double t0 = 0;
+ double t1 = 0;
+ cv::Mat cpu_dst;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ t0 = (double)cvGetTickCount();
+ gmat_whole.upload(mat);//upload
+ t0 = (double)cvGetTickCount() - t0;
+
+ t1 = (double)cvGetTickCount();
+ gmat_whole.download(cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;
+
+ if(j == 0)
+ continue;
+ totaluploadtick = t0 + totaluploadtick;
+ totaldownloadtick = t1 + totaldownloadtick;
+ }
+ EXPECT_MAT_SIMILAR(mat, cpu_dst, 0.0);
+ totaltick = totaluploadtick + totaldownloadtick;
+ cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+}
+//**********test************
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4)));
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
- Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
+INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine(
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
+ Values(false))); // Values(false) is the reserved parameter
#endif
PARAM_TEST_CASE(PyrDown, MatType, int)
{
- int type;
- int channels;
- //src mat
- cv::Mat mat1;
- cv::Mat dst;
-
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
-
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gdst;
-
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- }
-
-
+ int type;
+ int channels;
+ //src mat
+ cv::Mat mat1;
+ cv::Mat dst;
+
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gdst;
+
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
+
+
};
#define VARNAME(A) string(#A);
////////////////////////////////PyrDown/////////////////////////////////////////////////
TEST_P(PyrDown, Mat)
{
- cv::Size size(MWIDTH, MHEIGHT);
- cv::RNG &rng = TS::ptr()->get_rng();
- mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
-
-
- cv::ocl::oclMat gdst;
- double totalgputick = 0;
- double totalgputick_kernel = 0;
-
- double t1 = 0;
- double t2 = 0;
-
- for (int j = 0; j < LOOP_TIMES + 1; j ++)
- {
-
- t1 = (double)cvGetTickCount();//gpu start1
-
- cv::ocl::oclMat gmat1(mat1);
-
- t2 = (double)cvGetTickCount(); //kernel
- cv::ocl::pyrDown(gmat1, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_dst;
- gdst.download(cpu_dst);
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if (j == 0)
- {
- continue;
- }
-
- totalgputick = t1 + totalgputick;
-
- totalgputick_kernel = t2 + totalgputick_kernel;
-
- }
-
- cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
-
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+
+
+ cv::ocl::oclMat gdst;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat gmat1(mat1);
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::pyrDown(gmat1, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ gdst.download(cpu_dst);
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick = t1 + totalgputick;
+
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
}
//********test****************
PARAM_TEST_CASE(PyrUp, MatType, int)
{
- int type;
- int channels;
- //std::vector<cv::ocl::Info> oclinfo;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- }
+ int type;
+ int channels;
+ //std::vector<cv::ocl::Info> oclinfo;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ }
};
TEST_P(PyrUp, Performance)
{
- cv::Size size(MWIDTH, MHEIGHT);
- cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
- cv::Mat dst_gold;
- cv::ocl::oclMat dst;
-
-
- double totalgputick = 0;
- double totalgputick_kernel = 0;
-
- double t1 = 0;
- double t2 = 0;
-
- for (int j = 0; j < LOOP_TIMES + 1; j ++)
- {
- t1 = (double)cvGetTickCount();//gpu start1
-
- cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
-
- t2 = (double)cvGetTickCount(); //kernel
- cv::ocl::pyrUp(srcMat, dst);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_dst;
- dst.download(cpu_dst); //download
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if (j == 0)
- {
- continue;
- }
-
- totalgputick = t1 + totalgputick;
-
- totalgputick_kernel = t2 + totalgputick_kernel;
-
- }
-
-
- cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
-
-
+ cv::Size size(MWIDTH, MHEIGHT);
+ cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels));
+ cv::Mat dst_gold;
+ cv::ocl::oclMat dst;
+
+
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+
+ double t1 = 0;
+ double t2 = 0;
+
+ for (int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ t1 = (double)cvGetTickCount();//gpu start1
+
+ cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload
+
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::pyrUp(srcMat, dst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+
+ cv::Mat cpu_dst;
+ dst.download(cpu_dst); //download
+
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if (j == 0)
+ {
+ continue;
+ }
+
+ totalgputick = t1 + totalgputick;
+
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+
+
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+
+
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine(
using namespace cv::ocl;
PARAM_TEST_CASE(MergeTestBase, MatType, int)
{
- int type;
- int channels;
-
- //src mat
- cv::Mat mat1;
- cv::Mat mat2;
- cv::Mat mat3;
- cv::Mat mat4;
-
- //dst mat
- cv::Mat dst;
-
- // set up roi
- int roicols;
- int roirows;
- int src1x;
- int src1y;
- int src2x;
- int src2y;
- int src3x;
- int src3y;
- int src4x;
- int src4y;
- int dstx;
- int dsty;
-
- //src mat with roi
- cv::Mat mat1_roi;
- cv::Mat mat2_roi;
- cv::Mat mat3_roi;
- cv::Mat mat4_roi;
-
- //dst mat with roi
- cv::Mat dst_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat1;
- cv::ocl::oclMat gmat2;
- cv::ocl::oclMat gmat3;
- cv::ocl::oclMat gmat4;
- cv::ocl::oclMat gdst;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat1.cols-1; //start
- roirows = mat1.rows-1;
- src1x = 1;
- src1y = 1;
- src2x = 1;
- src2y = 1;
- src3x = 1;
- src3y = 1;
- src4x = 1;
- src4y = 1;
- dstx = 1;
- dsty =1;
-
- }else
- {
- roicols = mat1.cols;
- roirows = mat1.rows;
- src1x = 0;
- src1y = 0;
- src2x = 0;
- src2y = 0;
- src3x = 0;
- src3y = 0;
- src4x = 0;
- src4y = 0;
- dstx = 0;
- dsty = 0;
- };
-
- mat1_roi = mat1(Rect(src1x,src1y,roicols,roirows));
- mat2_roi = mat2(Rect(src2x,src2y,roicols,roirows));
- mat3_roi = mat3(Rect(src3x,src3y,roicols,roirows));
- mat4_roi = mat4(Rect(src4x,src4y,roicols,roirows));
-
-
- dst_roi = dst(Rect(dstx,dsty,roicols,roirows));
- }
+ int type;
+ int channels;
+
+ //src mat
+ cv::Mat mat1;
+ cv::Mat mat2;
+ cv::Mat mat3;
+ cv::Mat mat4;
+
+ //dst mat
+ cv::Mat dst;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int src1x;
+ int src1y;
+ int src2x;
+ int src2y;
+ int src3x;
+ int src3y;
+ int src4x;
+ int src4y;
+ int dstx;
+ int dsty;
+
+ //src mat with roi
+ cv::Mat mat1_roi;
+ cv::Mat mat2_roi;
+ cv::Mat mat3_roi;
+ cv::Mat mat4_roi;
+
+ //dst mat with roi
+ cv::Mat dst_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat1;
+ cv::ocl::oclMat gmat2;
+ cv::ocl::oclMat gmat3;
+ cv::ocl::oclMat gmat4;
+ cv::ocl::oclMat gdst;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat1.cols - 1; //start
+ roirows = mat1.rows - 1;
+ src1x = 1;
+ src1y = 1;
+ src2x = 1;
+ src2y = 1;
+ src3x = 1;
+ src3y = 1;
+ src4x = 1;
+ src4y = 1;
+ dstx = 1;
+ dsty = 1;
+
+ }
+ else
+ {
+ roicols = mat1.cols;
+ roirows = mat1.rows;
+ src1x = 0;
+ src1y = 0;
+ src2x = 0;
+ src2y = 0;
+ src3x = 0;
+ src3y = 0;
+ src4x = 0;
+ src4y = 0;
+ dstx = 0;
+ dsty = 0;
+ };
+
+ mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
+ mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
+ mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows));
+ mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows));
+
+
+ dst_roi = dst(Rect(dstx, dsty, roicols, roirows));
+ }
};
struct Merge : MergeTestBase {};
-TEST_P(Merge, Accuracy)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- std::vector<cv::Mat> dev_src;
- dev_src.push_back(mat1_roi);
- dev_src.push_back(mat2_roi);
- dev_src.push_back(mat3_roi);
- dev_src.push_back(mat4_roi);
- t0 = (double)cvGetTickCount();//cpu start
- cv::merge(dev_src, dst_roi);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1 ]
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmat3 = mat3_roi;
- gmat4 = mat4_roi;
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- std::vector<cv::ocl::oclMat> dev_gsrc;
- dev_gsrc.push_back(gmat1);
- dev_gsrc.push_back(gmat2);
- dev_gsrc.push_back(gmat3);
- dev_gsrc.push_back(gmat4);
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::merge(dev_gsrc, gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst;
- gdst_whole.download (cpu_dst);//download
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+TEST_P(Merge, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ std::vector<cv::Mat> dev_src;
+ dev_src.push_back(mat1_roi);
+ dev_src.push_back(mat2_roi);
+ dev_src.push_back(mat3_roi);
+ dev_src.push_back(mat4_roi);
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::merge(dev_src, dst_roi);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1 ]
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmat3 = mat3_roi;
+ gmat4 = mat4_roi;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ std::vector<cv::ocl::oclMat> dev_gsrc;
+ dev_gsrc.push_back(gmat1);
+ dev_gsrc.push_back(gmat2);
+ dev_gsrc.push_back(gmat3);
+ dev_gsrc.push_back(gmat4);
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::merge(dev_gsrc, gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst;
+ gdst_whole.download (cpu_dst);//download
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+
+ if(j == 0)
+ continue;
+
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- gmat1 = mat1_roi;
- gmat2 = mat2_roi;
- gmat3 = mat3_roi;
- gmat4 = mat4_roi;
- gdst_whole = dst;
- gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows));
- std::vector<cv::ocl::oclMat> dev_gsrc;
- dev_gsrc.push_back(gmat1);
- dev_gsrc.push_back(gmat2);
- dev_gsrc.push_back(gmat3);
- dev_gsrc.push_back(gmat4);
-
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::merge(dev_gsrc, gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ gmat1 = mat1_roi;
+ gmat2 = mat2_roi;
+ gmat3 = mat3_roi;
+ gmat4 = mat4_roi;
+ gdst_whole = dst;
+ gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows));
+ std::vector<cv::ocl::oclMat> dev_gsrc;
+ dev_gsrc.push_back(gmat1);
+ dev_gsrc.push_back(gmat2);
+ dev_gsrc.push_back(gmat3);
+ dev_gsrc.push_back(gmat4);
+
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::merge(dev_gsrc, gdst);
+ };
#endif
}
PARAM_TEST_CASE(SplitTestBase, MatType, int)
{
- int type;
- int channels;
-
- //src mat
- cv::Mat mat;
-
- //dstmat
- cv::Mat dst1;
- cv::Mat dst2;
- cv::Mat dst3;
- cv::Mat dst4;
-
- // set up roi
- int roicols;
- int roirows;
- int srcx;
- int srcy;
- int dst1x;
- int dst1y;
- int dst2x;
- int dst2y;
- int dst3x;
- int dst3y;
- int dst4x;
- int dst4y;
-
- //src mat with roi
- cv::Mat mat_roi;
-
- //dst mat with roi
- cv::Mat dst1_roi;
- cv::Mat dst2_roi;
- cv::Mat dst3_roi;
- cv::Mat dst4_roi;
- //std::vector<cv::ocl::Info> oclinfo;
- //ocl dst mat for testing
- cv::ocl::oclMat gdst1_whole;
- cv::ocl::oclMat gdst2_whole;
- cv::ocl::oclMat gdst3_whole;
- cv::ocl::oclMat gdst4_whole;
-
- //ocl mat with roi
- cv::ocl::oclMat gmat;
- cv::ocl::oclMat gdst1;
- cv::ocl::oclMat gdst2;
- cv::ocl::oclMat gdst3;
- cv::ocl::oclMat gdst4;
-
- virtual void SetUp()
- {
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
-
- cv::RNG& rng = TS::ptr()->get_rng();
- cv::Size size(MWIDTH, MHEIGHT);
-
- mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
- dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //setBinpath(CLBINPATH);
- }
-
- void Has_roi(int b)
- {
- //cv::RNG& rng = TS::ptr()->get_rng();
- if(b)
- {
- //randomize ROI
- roicols = mat.cols-1; //start
- roirows = mat.rows-1;
- srcx = 1;
- srcx = 1;
- dst1x = 1;
- dst1y =1;
- dst2x = 1;
- dst2y =1;
- dst3x = 1;
- dst3y =1;
- dst4x = 1;
- dst4y =1;
- }else
- {
- roicols = mat.cols;
- roirows = mat.rows;
- srcx = 0;
- srcy = 0;
- dst1x = 0;
- dst1y = 0;
- dst2x = 0;
- dst2y =0;
- dst3x = 0;
- dst3y =0;
- dst4x = 0;
- dst4y =0;
- };
-
- mat_roi = mat(Rect(srcx,srcy,roicols,roirows));
-
- dst1_roi = dst1(Rect(dst1x,dst1y,roicols,roirows));
- dst2_roi = dst2(Rect(dst2x,dst2y,roicols,roirows));
- dst3_roi = dst3(Rect(dst3x,dst3y,roicols,roirows));
- dst4_roi = dst4(Rect(dst4x,dst4y,roicols,roirows));
- }
+ int type;
+ int channels;
+
+ //src mat
+ cv::Mat mat;
+
+ //dstmat
+ cv::Mat dst1;
+ cv::Mat dst2;
+ cv::Mat dst3;
+ cv::Mat dst4;
+
+ // set up roi
+ int roicols;
+ int roirows;
+ int srcx;
+ int srcy;
+ int dst1x;
+ int dst1y;
+ int dst2x;
+ int dst2y;
+ int dst3x;
+ int dst3y;
+ int dst4x;
+ int dst4y;
+
+ //src mat with roi
+ cv::Mat mat_roi;
+
+ //dst mat with roi
+ cv::Mat dst1_roi;
+ cv::Mat dst2_roi;
+ cv::Mat dst3_roi;
+ cv::Mat dst4_roi;
+ //std::vector<cv::ocl::Info> oclinfo;
+ //ocl dst mat for testing
+ cv::ocl::oclMat gdst1_whole;
+ cv::ocl::oclMat gdst2_whole;
+ cv::ocl::oclMat gdst3_whole;
+ cv::ocl::oclMat gdst4_whole;
+
+ //ocl mat with roi
+ cv::ocl::oclMat gmat;
+ cv::ocl::oclMat gdst1;
+ cv::ocl::oclMat gdst2;
+ cv::ocl::oclMat gdst3;
+ cv::ocl::oclMat gdst4;
+
+ virtual void SetUp()
+ {
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Size size(MWIDTH, MHEIGHT);
+
+ mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false);
+ dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //setBinpath(CLBINPATH);
+ }
+
+ void Has_roi(int b)
+ {
+ //cv::RNG& rng = TS::ptr()->get_rng();
+ if(b)
+ {
+ //randomize ROI
+ roicols = mat.cols - 1; //start
+ roirows = mat.rows - 1;
+ srcx = 1;
+ srcx = 1;
+ dst1x = 1;
+ dst1y = 1;
+ dst2x = 1;
+ dst2y = 1;
+ dst3x = 1;
+ dst3y = 1;
+ dst4x = 1;
+ dst4y = 1;
+ }
+ else
+ {
+ roicols = mat.cols;
+ roirows = mat.rows;
+ srcx = 0;
+ srcy = 0;
+ dst1x = 0;
+ dst1y = 0;
+ dst2x = 0;
+ dst2y = 0;
+ dst3x = 0;
+ dst3y = 0;
+ dst4x = 0;
+ dst4y = 0;
+ };
+
+ mat_roi = mat(Rect(srcx, srcy, roicols, roirows));
+
+ dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows));
+ dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows));
+ dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows));
+ dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows));
+ }
};
-struct Split :SplitTestBase {};
-
-TEST_P(Split, Accuracy)
-{
-#ifndef PRINT_KERNEL_RUN_TIME
- double totalcputick=0;
- double totalgputick=0;
- double totalgputick_kernel=0;
- double t0=0;
- double t1=0;
- double t2=0;
- for(int k=LOOPROISTART;k<LOOPROIEND;k++){
- totalcputick=0;
- totalgputick=0;
- totalgputick_kernel=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- Has_roi(k);
- cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
- cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
- t0 = (double)cvGetTickCount();//cpu start
- cv::split(mat_roi, dev_dst);
- t0 = (double)cvGetTickCount() - t0;//cpu end
-
- t1 = (double)cvGetTickCount();//gpu start1
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
-
- gdst2_whole = dst2;
- gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
-
- gdst3_whole = dst3;
- gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
-
- gdst4_whole = dst4;
- gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
-
- gmat = mat_roi;
- t2=(double)cvGetTickCount();//kernel
- cv::ocl::split(gmat, dev_gdst);
- t2 = (double)cvGetTickCount() - t2;//kernel
- cv::Mat cpu_dst1;
- cv::Mat cpu_dst2;
- cv::Mat cpu_dst3;
- cv::Mat cpu_dst4;
- gdst1_whole.download(cpu_dst1);
- gdst2_whole.download(cpu_dst2);
- gdst3_whole.download(cpu_dst3);
- gdst4_whole.download(cpu_dst4);
- t1 = (double)cvGetTickCount() - t1;//gpu end1
- if(j == 0)
- continue;
- totalgputick=t1+totalgputick;
- totalcputick=t0+totalcputick;
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
- if(k==0){cout<<"no roi\n";}else{cout<<"with roi\n";};
- cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- }
+struct Split : SplitTestBase {};
+
+TEST_P(Split, Accuracy)
+{
+#ifndef PRINT_KERNEL_RUN_TIME
+ double totalcputick = 0;
+ double totalgputick = 0;
+ double totalgputick_kernel = 0;
+ double t0 = 0;
+ double t1 = 0;
+ double t2 = 0;
+ for(int k = LOOPROISTART; k < LOOPROIEND; k++)
+ {
+ totalcputick = 0;
+ totalgputick = 0;
+ totalgputick_kernel = 0;
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)
+ {
+ Has_roi(k);
+ cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+ cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+ t0 = (double)cvGetTickCount();//cpu start
+ cv::split(mat_roi, dev_dst);
+ t0 = (double)cvGetTickCount() - t0;//cpu end
+
+ t1 = (double)cvGetTickCount();//gpu start1
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
+
+ gdst2_whole = dst2;
+ gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
+
+ gdst3_whole = dst3;
+ gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
+
+ gdst4_whole = dst4;
+ gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
+
+ gmat = mat_roi;
+ t2 = (double)cvGetTickCount(); //kernel
+ cv::ocl::split(gmat, dev_gdst);
+ t2 = (double)cvGetTickCount() - t2;//kernel
+ cv::Mat cpu_dst1;
+ cv::Mat cpu_dst2;
+ cv::Mat cpu_dst3;
+ cv::Mat cpu_dst4;
+ gdst1_whole.download(cpu_dst1);
+ gdst2_whole.download(cpu_dst2);
+ gdst3_whole.download(cpu_dst3);
+ gdst4_whole.download(cpu_dst4);
+ t1 = (double)cvGetTickCount() - t1;//gpu end1
+ if(j == 0)
+ continue;
+ totalgputick = t1 + totalgputick;
+ totalcputick = t0 + totalcputick;
+ totalgputick_kernel = t2 + totalgputick_kernel;
+
+ }
+ if(k == 0)
+ {
+ cout << "no roi\n";
+ }
+ else
+ {
+ cout << "with roi\n";
+ };
+ cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
+ }
#else
- for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
- {
- Has_roi(j);
- cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
- cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
- gdst1_whole = dst1;
- gdst1 = gdst1_whole(Rect(dst1x,dst1y,roicols,roirows));
-
- gdst2_whole = dst2;
- gdst2 = gdst2_whole(Rect(dst2x,dst2y,roicols,roirows));
-
- gdst3_whole = dst3;
- gdst3 = gdst3_whole(Rect(dst3x,dst3y,roicols,roirows));
-
- gdst4_whole = dst4;
- gdst4 = gdst4_whole(Rect(dst4x,dst4y,roicols,roirows));
- gmat = mat_roi;
- if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";};
- cv::ocl::split(gmat, dev_gdst);
- };
+ for(int j = LOOPROISTART; j < LOOPROIEND; j ++)
+ {
+ Has_roi(j);
+ cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi};
+ cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4};
+ gdst1_whole = dst1;
+ gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows));
+
+ gdst2_whole = dst2;
+ gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows));
+
+ gdst3_whole = dst3;
+ gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows));
+
+ gdst4_whole = dst4;
+ gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows));
+ gmat = mat_roi;
+ if(j == 0)
+ {
+ cout << "no roi:";
+ }
+ else
+ {
+ cout << "\nwith roi:";
+ };
+ cv::ocl::split(gmat, dev_gdst);
+ };
#endif
}
//*************test*****************
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
- Values(CV_8UC4, CV_32FC4), Values(1, 4)));
+ Values(CV_8UC4, CV_32FC4), Values(1, 4)));
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
- Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
+ Values(CV_8U, CV_32S, CV_32F), Values(1, 4)));
#endif // HAVE_OPENCL
#include "precomp.hpp"\r
#include <iomanip>\r
\r
-#ifdef HAVE_OPENCL
-
-using namespace cv;
-using namespace cv::ocl;
-using namespace cvtest;
-using namespace testing;
+#ifdef HAVE_OPENCL\r
+\r
+using namespace cv;\r
+using namespace cv::ocl;\r
+using namespace cvtest;\r
+using namespace testing;\r
using namespace std;\r
\r
#define FILTER_IMAGE "../../../samples/gpu/road.png"\r
-
-TEST(SURF, Performance)
-{
- cv::Mat img = readImage(FILTER_IMAGE,cv::IMREAD_GRAYSCALE);
- ASSERT_FALSE(img.empty());
-
+\r
+TEST(SURF, Performance)\r
+{\r
+ cv::Mat img = readImage(FILTER_IMAGE, cv::IMREAD_GRAYSCALE);\r
+ ASSERT_FALSE(img.empty());\r
+\r
ocl::SURF_OCL d_surf;\r
ocl::oclMat d_keypoints;\r
ocl::oclMat d_descriptors;\r
-
- double totalgputick=0;
- double totalgputick_kernel=0;
-
- double t1=0;
- double t2=0;
- for(int j = 0; j < LOOP_TIMES+1; j ++)
- {
- t1 = (double)cvGetTickCount();//gpu start1
-
- ocl::oclMat d_src(img);//upload
-
- t2=(double)cvGetTickCount();//kernel
- d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);
- t2 = (double)cvGetTickCount() - t2;//kernel
-
- cv::Mat cpu_kp, cpu_dp;
- d_keypoints.download (cpu_kp);//download
- d_descriptors.download (cpu_dp);//download
-
- t1 = (double)cvGetTickCount() - t1;//gpu end1
-
- if(j == 0)
- continue;
-
- totalgputick=t1+totalgputick;
-
- totalgputick_kernel=t2+totalgputick_kernel;
-
- }
-
- cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
- cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
-
-
-}
+\r
+ double totalgputick = 0;\r
+ double totalgputick_kernel = 0;\r
+\r
+ double t1 = 0;\r
+ double t2 = 0;\r
+ for(int j = 0; j < LOOP_TIMES + 1; j ++)\r
+ {\r
+ t1 = (double)cvGetTickCount();//gpu start1\r
+\r
+ ocl::oclMat d_src(img);//upload\r
+\r
+ t2 = (double)cvGetTickCount(); //kernel\r
+ d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors);\r
+ t2 = (double)cvGetTickCount() - t2;//kernel\r
+\r
+ cv::Mat cpu_kp, cpu_dp;\r
+ d_keypoints.download (cpu_kp);//download\r
+ d_descriptors.download (cpu_dp);//download\r
+\r
+ t1 = (double)cvGetTickCount() - t1;//gpu end1\r
+\r
+ if(j == 0)\r
+ continue;\r
+\r
+ totalgputick = t1 + totalgputick;\r
+\r
+ totalgputick_kernel = t2 + totalgputick_kernel;\r
+\r
+ }\r
+\r
+ cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;\r
+ cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;\r
+\r
+\r
+}\r
#endif //Have opencl
\ No newline at end of file
#include "precomp.hpp"
-
\ No newline at end of file
int randomInt(int minVal, int maxVal)
{
- RNG& rng = TS::ptr()->get_rng();
+ RNG &rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
double randomDouble(double minVal, double maxVal)
{
- RNG& rng = TS::ptr()->get_rng();
+ RNG &rng = TS::ptr()->get_rng();
return rng.uniform(minVal, maxVal);
}
vector<DeviceInfo> devices(FeatureSet feature)
{
const vector<DeviceInfo>& d = devices();
-
+
vector<DeviceInfo> devs_filtered;
if (TargetArchs::builtWith(feature))
return v;
}
-const vector<MatType>& all_types()
+const vector<MatType> &all_types()
{
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
return v;
}
-Mat readImage(const string& fileName, int flags)
+Mat readImage(const string &fileName, int flags)
{
return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags);
}
-Mat readImageType(const string& fname, int type)
+Mat readImageType(const string &fname, int type)
{
Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR);
if (CV_MAT_CN(type) == 4)
return src;
}
-double checkNorm(const Mat& m)
+double checkNorm(const Mat &m)
{
return norm(m, NORM_INF);
}
-double checkNorm(const Mat& m1, const Mat& m2)
+double checkNorm(const Mat &m1, const Mat &m2)
{
return norm(m1, m2, NORM_INF);
}
-double checkSimilarity(const Mat& m1, const Mat& m2)
+double checkSimilarity(const Mat &m1, const Mat &m2)
{
Mat diff;
matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
}
*/
-void PrintTo(const Inverse& inverse, std::ostream* os)
+void PrintTo(const Inverse &inverse, std::ostream *os)
{
if (inverse)
(*os) << "inverse";
double randomDouble(double minVal, double maxVal);
//std::string generateVarList(int first,...);
-std::string generateVarList(int& p1,int& p2);
+std::string generateVarList(int &p1, int &p2);
cv::Size randomSize(int minVal, int maxVal);
cv::Scalar randomScalar(double minVal, double maxVal);
cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0);
//std::vector<cv::ocl::DeviceInfo> devices(cv::gpu::FeatureSet feature);
//! read image from testdata folder.
-cv::Mat readImage(const std::string& fileName, int flags = cv::IMREAD_COLOR);
-cv::Mat readImageType(const std::string& fname, int type);
+cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR);
+cv::Mat readImageType(const std::string &fname, int type);
-double checkNorm(const cv::Mat& m);
-double checkNorm(const cv::Mat& m1, const cv::Mat& m2);
-double checkSimilarity(const cv::Mat& m1, const cv::Mat& m2);
+double checkNorm(const cv::Mat &m);
+double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
+double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
#define EXPECT_MAT_NORM(mat, eps) \
{ \
EXPECT_LE(checkSimilarity(cv::Mat(mat1), cv::Mat(mat2)), eps); \
}
-namespace cv
-{
- namespace ocl
+namespace cv
+{
+ namespace ocl
{
// void PrintTo(const DeviceInfo& info, std::ostream* os);
}
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
-const std::vector<MatType>& all_types();
+const std::vector<MatType> &all_types();
class Inverse
{
- public:
- inline Inverse(bool val = false) : val_(val) {}
+public:
+ inline Inverse(bool val = false) : val_(val) {}
- inline operator bool() const { return val_; }
+ inline operator bool() const
+ {
+ return val_;
+ }
- private:
- bool val_;
+private:
+ bool val_;
};
-void PrintTo(const Inverse& useRoi, std::ostream* os);
+void PrintTo(const Inverse &useRoi, std::ostream *os);
CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE)
CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX)
- enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
+enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1};
CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y)
CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN)
- CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
+CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT)
template<typename T>
void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src1.depth() != CV_8S);
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
{4, 0, 4, 4, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
-
+ T scalar;
if(_scalar != NULL)
{
double scalar1 = *((double *)_scalar);
- T scalar = (T)scalar1;
+ scalar = (T)scalar1;
args.push_back( make_pair( sizeof(T), (void *)&scalar ));
}
}
void arithmetic_run(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(mask.type() == CV_8U);
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
- static MulDivFunc tab[] =
- {
- arithmetic_run<float>, 0, arithmetic_run<float>, arithmetic_run<float>,
- arithmetic_run<float>, arithmetic_run<float>, arithmetic_run<double>,
- };
-
- tab[src1.depth()](src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
+ if((src1.clCxt -> impl -> double_support != 0) && (src1.depth() == CV_64F))
+ arithmetic_run<double>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
+ else
+ arithmetic_run<float>(src1, src2, dst, "arithm_mul", &arithm_mul, (void *)(&scalar));
}
void cv::ocl::divide(const oclMat &src1, const oclMat &src2, oclMat &dst, double scalar)
{
- if(src1.clCxt -> impl -> double_support !=0)
+ if(src1.clCxt -> impl -> double_support != 0)
arithmetic_run<double>(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar));
else
arithmetic_run<float>(src1, src2, dst, "arithm_div", &arithm_div, (void *)(&scalar));
}
- template <typename WT ,typename CL_WT>
+template <typename WT , typename CL_WT>
void arithmetic_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
dst.create(src1.size(), src1.type());
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows &&
- src1.type() == dst.type());
+ src1.type() == dst.type());
//CV_Assert(src1.depth() != CV_8S);
CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols);
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
WT s[4] = { saturate_cast<WT>(src2.val[0]), saturate_cast<WT>(src2.val[1]),
- saturate_cast<WT>(src2.val[2]), saturate_cast<WT>(src2.val[3])
- };
+ saturate_cast<WT>(src2.val[2]), saturate_cast<WT>(src2.val[3])
+ };
int vector_lengths[4][7] = {{4, 0, 2, 2, 1, 1, 1},
{2, 0, 1, 1, 1, 1, 1},
{1, 0, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString, double scalar)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src.depth() != CV_8S);
Context *clCxt = src.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 0, 4, 4, 1, 1, 1},
{4, 0, 4, 4, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- if(src.clCxt -> impl -> double_support !=0)
+ if(src.clCxt -> impl -> double_support != 0)
args.push_back( make_pair( sizeof(cl_double), (void *)&scalar ));
else
{
}
void cv::ocl::divide(double scalar, const oclMat &src, oclMat &dst)
{
- if(src.clCxt -> impl -> double_support ==0)
+ if(src.clCxt -> impl -> double_support == 0)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
{
dst.create(src1.size(), CV_8UC1);
- CV_Assert(src1.channels() == 1);
+ CV_Assert(src1.oclchannels() == 1);
CV_Assert(src1.type() == src2.type());
Context *clCxt = src1.clCxt;
int depth = src1.depth();
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
- int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
+ int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[512];
CV_Assert(type == 0 || type == 1 || type == 2);
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d -D FUNC_TYPE_%d", src.depth(), repeat_s, repeat_e, type);
args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst ));
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
- if(src.channels() != 3)
+ if(src.oclchannels() != 3)
openCLExecuteKernel(src.clCxt, &arithm_sum, "arithm_op_sum", gt, lt, args, -1, -1, build_options);
else
openCLExecuteKernel(src.clCxt, &arithm_sum_3, "arithm_op_sum_3", gt, lt, args, -1, -1, build_options);
}
template <typename T>
-Scalar arithmetic_sum(const oclMat &src)
+Scalar arithmetic_sum(const oclMat &src, int type = 0)
{
size_t groupnum = src.clCxt->impl->maxComputeUnits;
CV_Assert(groupnum != 0);
- int vlen = src.channels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status;
+ int vlen = src.oclchannels() == 3 ? 12 : 8, dbsize = groupnum * vlen, status;
Context *clCxt = src.clCxt;
T *p = new T[dbsize];
- cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize*sizeof(T));
+ cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(T));
Scalar s;
s.val[0] = 0.0;
s.val[1] = 0.0;
s.val[2] = 0.0;
s.val[3] = 0.0;
- arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum);
+ arithmetic_sum_buffer_run(src, dstBuffer, vlen, groupnum, type);
memset(p, 0, dbsize * sizeof(T));
- openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize*sizeof(T));
+ openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(T));
for(int i = 0; i < dbsize;)
{
- for(int j = 0; j < src.channels(); j++, i++)
+ for(int j = 0; j < src.oclchannels(); j++, i++)
s.val[j] += p[i];
}
delete[] p;
return s;
}
-typedef Scalar (*sumFunc)(const oclMat &src);
+typedef Scalar (*sumFunc)(const oclMat &src, int type);
Scalar cv::ocl::sum(const oclMat &src)
{
- if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
static sumFunc functab[2] =
{
sumFunc func;
func = functab[src.clCxt->impl->double_support];
- return func(src);
+ return func(src, 0);
+}
+
+
+Scalar cv::ocl::sqrSum(const oclMat &src)
+{
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
+ {
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
+ }
+ static sumFunc functab[2] =
+ {
+ arithmetic_sum<float>,
+ arithmetic_sum<double>
+ };
+
+ sumFunc func;
+ func = functab[src.clCxt->impl->double_support];
+ return func(src, 2);
}
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////// meanStdDev //////////////////////////////////
{
CV_Assert(src.depth() <= CV_32S);
cv::Size sz(1, 1);
- int channels = src.channels();
+ int channels = src.oclchannels();
Mat m1(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)),
m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
oclMat dst1(m1), dst2(m2);
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
- int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
+ int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[50];
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
args.push_back( make_pair( sizeof(cl_int) , (void *)&cols ));
vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50];
- if(src.channels() == 1)
+ if(src.oclchannels() == 1)
{
int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
int vlen = 8;
int dbsize = groupnum * 2 * vlen * sizeof(T) , status;
Context *clCxt = src.clCxt;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize);
+ cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
if (mask.empty())
{
}
T *p = new T[groupnum * vlen * 2];
memset(p, 0, dbsize);
- openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize);
+ openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
for(int i = 0; i < vlen * groupnum; i++)
{
*minVal = *minVal < p[i] ? *minVal : p[i];
typedef void (*minMaxFunc)(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask);
void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
{
- CV_Assert(src.channels() == 1);
- if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
+ CV_Assert(src.oclchannels() == 1);
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
static minMaxFunc functab[8] =
{
bool isRelative = (normType & NORM_RELATIVE) != 0;
normType &= 7;
CV_Assert(src1.depth() <= CV_32S && src1.type() == src2.type() && ( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2));
- int channels = src1.channels(), i = 0, *p;
+ int channels = src1.oclchannels(), i = 0, *p;
double r = 0;
oclMat gm1(src1.size(), src1.type());
int min_int = (normType == NORM_INF ? CL_INT_MIN : 0);
//////////////////////////////////////////////////////////////////////////////
void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
int rows = divUp(dst.rows, 2);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
}
void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{1, 1, 1, 1, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
cols = isVertical ? cols : divUp(cols, 2);
int rows = isVertical ? divUp(dst.rows, 2) : dst.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip;
- openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.channels(), depth);
+ openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
}
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
{
void arithmetic_lut_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
Context *clCxt = src1.clCxt;
- int channels = src1.channels();
+ int channels = src1.oclchannels();
int rows = src1.rows;
int cols = src1.cols;
//int step = src1.step;
- int src_step = src1.step/ src1.elemSize();
- int dst_step = dst.step/ dst.elemSize();
+ int src_step = src1.step / src1.elemSize();
+ int dst_step = dst.step / dst.elemSize();
int whole_rows = src1.wholerows;
int whole_cols = src1.wholecols;
- int src_offset = src1.offset/ src1.elemSize();
- int dst_offset = dst.offset/ dst.elemSize();
- int lut_offset = src2.offset/ src2.elemSize();
+ int src_offset = src1.offset / src1.elemSize();
+ int dst_offset = dst.offset / dst.elemSize();
+ int lut_offset = src2.offset / src2.elemSize();
int left_col = 0, right_col = 0;
size_t localSize[] = {16, 16, 1};
//cl_kernel kernel = openCLGetKernelFromSource(clCxt,&arithm_LUT,kernelName);
- size_t globalSize[] = {(cols + localSize[0] - 1) / localSize[0]*localSize[0], (rows + localSize[1] - 1) / localSize[1]*localSize[1], 1};
+ size_t globalSize[] = {(cols + localSize[0] - 1) / localSize[0] *localSize[0], (rows + localSize[1] - 1) / localSize[1] *localSize[1], 1};
if(channels == 1 && cols > 6)
{
left_col = 4 - (dst_offset & 3);
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src1.cols == dst.cols);
CV_Assert(src1.rows == dst.rows);
- CV_Assert(src1.channels() == dst.channels());
+ CV_Assert(src1.oclchannels() == dst.oclchannels());
// CV_Assert(src1.step == dst.step);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
- openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.channels(), src1.depth());
+ openCLExecuteKernel(clCxt, &arithm_LUT, kernelName, globalSize, localSize, args, src1.oclchannels(), src1.depth());
}
if(channels == 1 && (left_col != 0 || right_col != 0))
{
args.push_back( make_pair( sizeof(cl_int), (void *)&lut_offset ));
args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
- openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.channels(), src1.depth());
+ openCLExecuteKernel(clCxt, &arithm_LUT, "LUT2", globalSize, localSize, args, src1.oclchannels(), src1.depth());
}
}
{
int cn = src.channels();
CV_Assert(src.depth() == CV_8U);
- CV_Assert((lut.channels() == 1 || lut.channels() == cn) && lut.rows == 1 && lut.cols == 256);
+ CV_Assert((lut.oclchannels() == 1 || lut.oclchannels() == cn) && lut.rows == 1 && lut.cols == 256);
dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));
//oclMat _lut(lut);
string kernelName = "LUT";
CV_Assert( src.type() == CV_32F || src.type() == CV_64F);
Context *clCxt = src.clCxt;
- if(clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
- //int channels = dst.channels();
+ //int channels = dst.oclchannels();
int depth = dst.depth();
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(dst.cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(dst.cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
//////////////////////////////////////////////////////////////////////////////
void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
int rows = dst.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
int rows = dst.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
string kernelName, bool angleInDegrees)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
Context *clCxt = src1.clCxt;
- int channels = src1.channels();
+ int channels = src1.oclchannels();
int depth = src1.depth();
int cols = src1.cols * channels;
int rows = src1.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
string kernelName)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
Context *clCxt = src2.clCxt;
- int channels = src2.channels();
+ int channels = src2.oclchannels();
int depth = src2.depth();
int cols = src2.cols * channels;
int rows = src2.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
1
};
vector<pair<size_t , const void *> > args;
size_t gt[3] = {groupnum * 256, 1, 1}, lt[3] = {256, 1, 1};
char build_options[50];
- if(src.channels() == 1)
+ if(src.oclchannels() == 1)
{
int cols = (src.cols - 1) / vlen + 1;
int invalid_cols = src.step / (vlen * src.elemSize1()) - cols;
}
template<typename T>
void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
- Point *minLoc, Point *maxLoc, const oclMat &mask)
+ Point *minLoc, Point *maxLoc, const oclMat &mask)
{
- CV_Assert(src.channels() == 1);
- size_t groupnum = src.clCxt->impl->maxComputeUnits;
+ CV_Assert(src.oclchannels() == 1);
+ size_t groupnum = src.clCxt->impl->maxComputeUnits;
CV_Assert(groupnum != 0);
int minloc = -1 , maxloc = -1;
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) , status;
Context *clCxt = src.clCxt;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize);
+ cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize);
*minVal = std::numeric_limits<double>::max() , *maxVal = -std::numeric_limits<double>::max();
if (mask.empty())
{
}
T *p = new T[groupnum * vlen * 4];
memset(p, 0, dbsize);
- openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize);
+ openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize);
for(int i = 0; i < vlen * groupnum; i++)
{
- *minVal = (*minVal < p[i] || p[i + 2 * vlen *groupnum] == -1) ? *minVal : p[i];
- minloc = (*minVal < p[i] || p[i + 2 * vlen *groupnum] == -1) ? minloc : p[i + 2 * vlen * groupnum];
+ *minVal = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? *minVal : p[i];
+ minloc = (*minVal < p[i] || p[i + 2 * vlen * groupnum] == -1) ? minloc : p[i + 2 * vlen * groupnum];
}
for(int i = vlen * groupnum; i < 2 * vlen * groupnum; i++)
{
- *maxVal = (*maxVal > p[i] || p[i + 2 * vlen *groupnum] == -1) ? *maxVal : p[i];
- maxloc = (*maxVal > p[i] || p[i + 2 * vlen *groupnum] == -1) ? maxloc : p[i + 2 * vlen * groupnum];
+ *maxVal = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? *maxVal : p[i];
+ maxloc = (*maxVal > p[i] || p[i + 2 * vlen * groupnum] == -1) ? maxloc : p[i + 2 * vlen * groupnum];
}
int pre_rows = src.offset / src.step;
}
typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal,
- Point *minLoc, Point *maxLoc, const oclMat &mask);
+ Point *minLoc, Point *maxLoc, const oclMat &mask);
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
Point *minLoc, Point *maxLoc, const oclMat &mask)
{
- if(src.clCxt->impl->double_support==0 && src.depth()==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
static minMaxLocFunc functab[2] =
{
minMaxLocFunc func;
func = functab[src.clCxt->impl->double_support];
- func(src,minVal,maxVal,minLoc,maxLoc,mask);
+ func(src, minVal, maxVal, minLoc, maxLoc, mask);
}
//////////////////////////////////////////////////////////////////////////////
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;;
int offset = src.offset / (vlen * src.elemSize1());
int repeat_s = src.offset / src.elemSize1() - offset * vlen;
- int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.channels();
+ int repeat_e = (offset + cols) * vlen - src.offset / src.elemSize1() - src.cols * src.oclchannels();
char build_options[50];
sprintf(build_options, "-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d", src.depth(), repeat_s, repeat_e);
int cv::ocl::countNonZero(const oclMat &src)
{
size_t groupnum = src.clCxt->impl->maxComputeUnits;
- if(src.clCxt->impl->double_support == 0 && src.depth()==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
CV_Assert(groupnum != 0);
groupnum = groupnum * 2;
Context *clCxt = src.clCxt;
string kernelName = "arithm_op_nonzero";
int *p = new int[dbsize], nonzero = 0;
- cl_mem dstBuffer = openCLCreateBuffer(clCxt,CL_MEM_WRITE_ONLY,dbsize*sizeof(int));
+ cl_mem dstBuffer = openCLCreateBuffer(clCxt, CL_MEM_WRITE_ONLY, dbsize * sizeof(int));
arithmetic_countNonZero_run(src, dstBuffer, vlen, groupnum, kernelName);
memset(p, 0, dbsize * sizeof(int));
- openCLReadBuffer(clCxt,dstBuffer,(void *)p,dbsize*sizeof(int));
+ openCLReadBuffer(clCxt, dstBuffer, (void *)p, dbsize * sizeof(int));
for(int i = 0; i < dbsize; i++)
{
nonzero += p[i];
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 4, 4, 1, 1, 1},
{4, 4, 4, 4, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
CV_Assert(mask.type() == CV_8U);
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
int vector_lengths[4][7] = {{4, 4, 2, 2, 1, 1, 1},
{1, 1, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
}
-template <typename WT ,typename CL_WT>
+template <typename WT , typename CL_WT>
void bitwise_scalar_run(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask, string kernelName, const char **kernelString, int isMatSubScalar)
{
dst.create(src1.size(), src1.type());
CV_Assert(mask.type() == CV_8U && src1.rows == mask.rows && src1.cols == mask.cols);
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
WT s[4] = { saturate_cast<WT>(src2.val[0]), saturate_cast<WT>(src2.val[1]),
{1, 1, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
1
};
0
#else
- bitwise_scalar_run<unsigned char,cl_uchar4>,
- bitwise_scalar_run<char,cl_char4>,
- bitwise_scalar_run<unsigned short,cl_ushort4>,
- bitwise_scalar_run<short,cl_short4>,
- bitwise_scalar_run<int,cl_int4>,
- bitwise_scalar_run<float,cl_float4>,
- bitwise_scalar_run<double,cl_double4>,
+ bitwise_scalar_run<unsigned char, cl_uchar4>,
+ bitwise_scalar_run<char, cl_char4>,
+ bitwise_scalar_run<unsigned short, cl_ushort4>,
+ bitwise_scalar_run<short, cl_short4>,
+ bitwise_scalar_run<int, cl_int4>,
+ bitwise_scalar_run<float, cl_float4>,
+ bitwise_scalar_run<double, cl_double4>,
0
#endif
};
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type()==CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_or(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
// dst.create(src1.size(),src1.type());
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_and(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
void cv::ocl::bitwise_xor(const oclMat &src1, const Scalar &src2, oclMat &dst, const oclMat &mask)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type()==CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
#define BLOCK_ROWS (256/TILE_DIM)
void transpose_run(const oclMat &src, oclMat &dst, string kernelName)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
CV_Assert(src.cols == dst.rows && src.rows == dst.cols);
Context *clCxt = src.clCxt;
- int channels = src.channels();
+ int channels = src.oclchannels();
int depth = src.depth();
int vector_lengths[4][7] = {{1, 0, 0, 0, 1, 1, 0},
{1, 1, 0, 0, 0, 0, 0}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = ((dst.offset % dst.step) / dst.elemSize()) & (vector_length - 1);
int cols = divUp(src.cols + offset_cols, vector_length);
size_t localThreads[3] = { TILE_DIM, BLOCK_ROWS, 1 };
- size_t globalThreads[3] = { divUp(cols, TILE_DIM) * localThreads[0],
- divUp(src.rows, TILE_DIM) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, TILE_DIM) *localThreads[0],
+ divUp(src.rows, TILE_DIM) *localThreads[1],
1
};
void cv::ocl::transpose(const oclMat &src, oclMat &dst)
{
- CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_8SC4 ||
+ CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_8SC3 || src.type() == CV_8SC4 ||
src.type() == CV_16UC2 || src.type() == CV_16SC2 || src.type() == CV_32SC1 || src.type() == CV_32FC1);
stringstream idxstr;
CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 256, 1, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
}
else
{
- float alpha_f=alpha,beta_f=beta,gama_f=gama;
+ float alpha_f = alpha, beta_f = beta, gama_f = gama;
args.push_back( make_pair( sizeof(cl_float), (void *)&alpha_f ));
args.push_back( make_pair( sizeof(cl_float), (void *)&beta_f ));
args.push_back( make_pair( sizeof(cl_float), (void *)&gama_f ));
- }
+ }
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst)
{
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size() &&
- (src1.depth() == CV_32F ));
+ (src1.depth() == CV_32F ));
dst.create(src1.size(), src1.type());
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 256, 1, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (dst.offset / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 256, 1, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
CV_Assert(src1.type() == dst.type());
Context *clCxt = src1.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
size_t vector_length = 1;
int rows = dst.rows;
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = dst.cols * dst.elemSize();
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
- if(src1.clCxt -> impl -> double_support ==0)
+ if(src1.clCxt -> impl -> double_support == 0)
{
- float pf = p;
- args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
+ float pf = p;
+ args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
}
else
- args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
+ args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
}
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
{
- if(x.clCxt -> impl -> double_support ==0 && x.type()==CV_64F)
+ if(x.clCxt -> impl -> double_support == 0 && x.type() == CV_64F)
{
cout << "Selected device do not support double" << endl;
return;
using namespace std;
#if !defined (HAVE_OPENCL)
-void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
- oclMat& result){throw_nogpu();}
+void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
+ oclMat &result)
+{
+ throw_nogpu();
+}
#else
-namespace cv
+namespace cv
{
- namespace ocl
- {
+ namespace ocl
+ {
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern const char *blend_linear;
- }
+ }
}
-void cv::ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2,
- oclMat& result)
+void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2,
+ oclMat &result)
{
- cv::ocl::Context *ctx = img1.clCxt;
- assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
- int channels = img1.channels();
- int depth = img1.depth();
- int rows = img1.rows;
- int cols = img1.cols;
- int istep = img1.step1();
- int wstep = weights1.step1();
- size_t globalSize[] = {cols * channels, rows, 1};
- size_t localSize[] = {16, 16, 1};
+ cv::ocl::Context *ctx = img1.clCxt;
+ assert(ctx == img2.clCxt && ctx == weights1.clCxt && ctx == weights2.clCxt);
+ int channels = img1.oclchannels();
+ int depth = img1.depth();
+ int rows = img1.rows;
+ int cols = img1.cols;
+ int istep = img1.step1();
+ int wstep = weights1.step1();
+ size_t globalSize[] = {cols * channels, rows, 1};
+ size_t localSize[] = {16, 16, 1};
- vector< pair<size_t, const void *> > args;
+ vector< pair<size_t, const void *> > args;
- if(globalSize[0]!=0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
- std::string kernelName = "BlendLinear";
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&img1.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&img2.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&weights1.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&weights2.data ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&istep ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&wstep ));
+ std::string kernelName = "BlendLinear";
- openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
- }
+ openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
+ }
}
#endif
\ No newline at end of file
using namespace std;
#if !defined (HAVE_OPENCL)
-cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat>&) { throw_nogpu(); }
-const vector<oclMat>& cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const { throw_nogpu(); return trainDescCollection; }
-void cv::ocl::BruteForceMatcher_OCL_base::clear() { throw_nogpu(); }
-bool cv::ocl::BruteForceMatcher_OCL_base::empty() const { throw_nogpu(); return true; }
-bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const { throw_nogpu(); return true; }
-void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, const oclMat&, vector<DMatch>&, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat&, oclMat&, const vector<oclMat>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat&, const oclMat&, const oclMat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat&, const Mat&, const Mat&, vector<DMatch>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat&, vector<DMatch>&, const vector<oclMat>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, int, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, const oclMat&, vector< vector<DMatch> >&, int, const oclMat&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat&, vector< vector<DMatch> >&, int, const vector<oclMat>&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat&, float, const oclMat&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, const oclMat&, vector< vector<DMatch> >&, float, const oclMat&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat&, oclMat&, oclMat&, oclMat&, oclMat&, float, const vector<oclMat>&) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat&, const oclMat&, const oclMat&, const oclMat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, vector< vector<DMatch> >&, bool) { throw_nogpu(); }
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat&, vector< vector<DMatch> >&, float, const vector<oclMat>&, bool) { throw_nogpu(); }
+cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat> &)
+{
+ throw_nogpu();
+}
+const vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
+{
+ throw_nogpu();
+ return trainDescCollection;
+}
+void cv::ocl::BruteForceMatcher_OCL_base::clear()
+{
+ throw_nogpu();
+}
+bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
+{
+ throw_nogpu();
+ return true;
+}
+bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
+{
+ throw_nogpu();
+ return true;
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, vector<DMatch> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, vector<DMatch> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, const oclMat &, vector<DMatch> &, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &, oclMat &, const vector<oclMat> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, const oclMat &, vector<DMatch> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, const Mat &, vector<DMatch> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, vector<DMatch> &, const vector<oclMat> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, int, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, int, const oclMat &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, vector< vector<DMatch> > &, int, const vector<oclMat> &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, float, const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, const oclMat &, vector< vector<DMatch> > &, float, const oclMat &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &, oclMat &, oclMat &, oclMat &, oclMat &, float, const vector<oclMat> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, const oclMat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, const Mat &, vector< vector<DMatch> > &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, vector< vector<DMatch> > &, float, const vector<oclMat> &, bool)
+{
+ throw_nogpu();
+}
#else /* !defined (HAVE_OPENCL) */
using namespace std;
-namespace cv
+namespace cv
{
- namespace ocl
- {
+ namespace ocl
+ {
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern const char *brute_force_match;
- }
+ }
}
-template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/>
-void matchUnrolledCached(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
+template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
+void matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- int m_size = MAX_DESC_LEN;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_UnrollMatch";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
-}
-
-template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/>
-void matchUnrolledCached(const oclMat query, const oclMat* trains, int n, const oclMat mask,
- const oclMat& bestTrainIdx, const oclMat& bestImgIdx, const oclMat& bestDistance, int distType)
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= 2 * BLOCK_SIZE ? MAX_DESC_LEN : 2 * BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ int m_size = MAX_DESC_LEN;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_UnrollMatch";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
+}
+
+template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
+void matchUnrolledCached(const oclMat query, const oclMat *trains, int n, const oclMat mask,
+ const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType)
{
}
-template <int BLOCK_SIZE, typename T/*, typename Mask*/>
-void match(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
+template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
+void match(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- vector< pair<size_t, const void *> > args;
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ vector< pair<size_t, const void *> > args;
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
- std::string kernelName = "BruteForceMatch_Match";
+ std::string kernelName = "BruteForceMatch_Match";
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
}
-template <int BLOCK_SIZE, typename T/*, typename Mask*/>
-void match(const oclMat query, const oclMat* trains, int n, const oclMat mask,
- const oclMat &bestTrainIdx, const oclMat& bestImgIdx, const oclMat& bestDistance, int distType)
+template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
+void match(const oclMat query, const oclMat *trains, int n, const oclMat mask,
+ const oclMat &bestTrainIdx, const oclMat &bestImgIdx, const oclMat &bestDistance, int distType)
{
}
//radius_matchUnrolledCached
-template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/>
-void matchUnrolledCached(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType)
-{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- int m_size = MAX_DESC_LEN;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_RadiusUnrollMatch";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
+template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
+void matchUnrolledCached(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
+{
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ int m_size = MAX_DESC_LEN;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_RadiusUnrollMatch";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
}
//radius_match
-template <int BLOCK_SIZE, typename T/*, typename Mask*/>
-void radius_match(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance,const oclMat& nMatches, int distType)
-{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_RadiusMatch";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- //float *dis = (float *)clEnqueueMapBuffer(ctx->impl->clCmdQueue, (cl_mem)distance.data, CL_TRUE, CL_MAP_READ, 0, 8, 0, NULL, NULL, NULL);
- //printf("%f, %f\n", dis[0], dis[1]);
- }
+template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
+void radius_match(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
+{
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(train.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, (query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_float), (void *)&maxDistance ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&nMatches.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&trainIdx.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_RadiusMatch";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ //float *dis = (float *)clEnqueueMapBuffer(ctx->impl->clCmdQueue, (cl_mem)distance.data, CL_TRUE, CL_MAP_READ, 0, 8, 0, NULL, NULL, NULL);
+ //printf("%f, %f\n", dis[0], dis[1]);
+ }
}
// with mask
-template < typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
{
if (query.cols <= 64)
{
matchUnrolled<16, 256, Dist>(query, train, mask, trainIdx, distance, stream);
}
else if (query.cols <= 512)
- {
+ {
matchUnrolled<16, 512, Dist>(query, train, mask, trainIdx, distance, stream);
}
else if (query.cols <= 1024)
- {
+ {
matchUnrolled<16, 1024, Dist>(query, train, mask, trainIdx, distance, stream);
}*/
else
}
// without mask
-template <typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, const oclMat& trainIdx, const oclMat& distance, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, const oclMat &trainIdx, const oclMat &distance, int distType)
{
- oclMat mask;
- if (query.cols <= 64)
+ oclMat mask;
+ if (query.cols <= 64)
{
matchUnrolledCached<16, 64, T>(query, train, mask, trainIdx, distance, distType);
}
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}
else if (query.cols <= 512)
- {
+ {
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}
else if (query.cols <= 1024)
- {
+ {
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance);
}*/
else
}
}
-template <typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, int distType)
{
if (query.cols <= 64)
{
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 512)
- {
+ {
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 1024)
- {
+ {
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}*/
else
}
}
-template <typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat* trains, int n, const oclMat& trainIdx,
- const oclMat& imgIdx, const oclMat& distance, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat *trains, int n, const oclMat &trainIdx,
+ const oclMat &imgIdx, const oclMat &distance, int distType)
{
- oclMat mask;
+ oclMat mask;
if (query.cols <= 64)
{
matchUnrolledCached<16, 64, T>(query, trains, n, mask, trainIdx, imgIdx, distance, distType);
matchUnrolled<16, 256, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 512)
- {
+ {
matchUnrolled<16, 512, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}
else if (query.cols <= 1024)
- {
+ {
matchUnrolled<16, 1024, Dist>(query, trains, n, mask, trainIdx, imgIdx, distance, stream);
}*/
else
//radius matchDispatcher
// with mask
-template < typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
if (query.cols <= 64)
{
}
// without mask
-template <typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& trainIdx,
- const oclMat& distance, const oclMat& nMatches, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &trainIdx,
+ const oclMat &distance, const oclMat &nMatches, int distType)
{
- oclMat mask;
- if (query.cols <= 64)
+ oclMat mask;
+ if (query.cols <= 64)
{
matchUnrolledCached<16, 64, T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
}
}
}
-template < typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, int distType)
{
if (query.cols <= 64)
{
}
// without mask
-template <typename T/*, typename Mask*/>
-void matchDispatcher(const oclMat& query, const oclMat& train, int n, float maxDistance, const oclMat& trainIdx,
- const oclMat& distance, const oclMat& nMatches, int distType)
+template < typename T/*, typename Mask*/ >
+void matchDispatcher(const oclMat &query, const oclMat &train, int n, float maxDistance, const oclMat &trainIdx,
+ const oclMat &distance, const oclMat &nMatches, int distType)
{
- oclMat mask;
- if (query.cols <= 64)
+ oclMat mask;
+ if (query.cols <= 64)
{
matchUnrolledCached<16, 64, T>(query, train, n, maxDistance, mask, trainIdx, distance, nMatches, distType);
}
}
//knn match Dispatcher
-template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/>
-void knn_matchUnrolledCached(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
-{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- int m_size = MAX_DESC_LEN;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_knnUnrollMatch";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
-}
-
-template <int BLOCK_SIZE, typename T/*, typename Mask*/>
-void knn_match(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
-{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_knnMatch";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
-}
-
-template <int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/>
-void calcDistanceUnrolled(const oclMat& query, const oclMat& train, const oclMat& mask, const oclMat& allDist, int distType)
-{
- cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- int m_size = MAX_DESC_LEN;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_calcDistanceUnrolled";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
-}
-
-template <int BLOCK_SIZE, typename T/*, typename Mask*/>
-void calcDistance(const oclMat& query, const oclMat& train, const oclMat& mask, const oclMat& allDist, int distType)
+template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
+void knn_matchUnrolledCached(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
+{
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (BLOCK_SIZE * (MAX_DESC_LEN >= BLOCK_SIZE ? MAX_DESC_LEN : BLOCK_SIZE) + BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ int m_size = MAX_DESC_LEN;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_knnUnrollMatch";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
+}
+
+template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
+void knn_match(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
+{
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_knnMatch";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
+}
+
+template < int BLOCK_SIZE, int MAX_DESC_LEN, typename T/*, typename Mask*/ >
+void calcDistanceUnrolled(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
+{
+ cv::ocl::Context *ctx = query.clCxt;
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ int m_size = MAX_DESC_LEN;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&m_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_calcDistanceUnrolled";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
+}
+
+template < int BLOCK_SIZE, typename T/*, typename Mask*/ >
+void calcDistance(const oclMat &query, const oclMat &train, const oclMat &mask, const oclMat &allDist, int distType)
{
cv::ocl::Context *ctx = query.clCxt;
- size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
- size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
- const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
- int block_size = BLOCK_SIZE;
- vector< pair<size_t, const void *> > args;
-
- if(globalSize[0] != 0)
- {
- args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
- args.push_back( make_pair( smemSize, (void *)NULL));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
-
- std::string kernelName = "BruteForceMatch_calcDistance";
-
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
- }
+ size_t globalSize[] = {(query.rows + BLOCK_SIZE - 1) / BLOCK_SIZE * BLOCK_SIZE, BLOCK_SIZE, 1};
+ size_t localSize[] = {BLOCK_SIZE, BLOCK_SIZE, 1};
+ const size_t smemSize = (2 * BLOCK_SIZE * BLOCK_SIZE) * sizeof(int);
+ int block_size = BLOCK_SIZE;
+ vector< pair<size_t, const void *> > args;
+
+ if(globalSize[0] != 0)
+ {
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&query.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&train.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mask.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
+ args.push_back( make_pair( smemSize, (void *)NULL));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&distType ));
+
+ std::string kernelName = "BruteForceMatch_calcDistance";
+
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ }
}
///////////////////////////////////////////////////////////////////////////////
// Calc Distance dispatcher
-template <typename T/*, typename Mask*/>
-void calcDistanceDispatcher(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& allDist, int distType)
+template < typename T/*, typename Mask*/ >
+void calcDistanceDispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &allDist, int distType)
{
if (query.cols <= 64)
{
}
}
-template <typename T/*, typename Mask*/>
-void match2Dispatcher(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, int distType)
+template < typename T/*, typename Mask*/ >
+void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, int distType)
{
if (query.cols <= 64)
{
matchUnrolled<16, 256, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}
else if (query.cols <= 512)
- {
+ {
matchUnrolled<16, 512, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}
else if (query.cols <= 1024)
- {
+ {
matchUnrolled<16, 1024, Dist>(query, train, mask, static_cast< DevMem2D_<int2> >(trainIdx), static_cast< DevMem2D_<float2> > (distance), stream);
}*/
else
}
template <int BLOCK_SIZE>
-void findKnnMatch(int k, const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType)
+void findKnnMatch(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
{
- cv::ocl::Context *ctx = trainIdx.clCxt;
- size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
- size_t localSize[] = {BLOCK_SIZE, 1, 1};
- int block_size = BLOCK_SIZE;
- std::string kernelName = "BruteForceMatch_findBestMatch";
+ cv::ocl::Context *ctx = trainIdx.clCxt;
+ size_t globalSize[] = {trainIdx.rows * BLOCK_SIZE, 1, 1};
+ size_t localSize[] = {BLOCK_SIZE, 1, 1};
+ int block_size = BLOCK_SIZE;
+ std::string kernelName = "BruteForceMatch_findBestMatch";
for (int i = 0; i < k; ++i)
- {
- vector< pair<size_t, const void *> > args;
+ {
+ vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&i));
- args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
- //args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
- //args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
- //args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&allDist.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&trainIdx.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&distance.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&i));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&block_size ));
+ //args.push_back( make_pair( sizeof(cl_int), (void *)&train.rows ));
+ //args.push_back( make_pair( sizeof(cl_int), (void *)&train.cols ));
+ //args.push_back( make_pair( sizeof(cl_int), (void *)&query.step ));
- openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
+ openCLExecuteKernel(ctx, &brute_force_match, kernelName, globalSize, localSize, args, -1, -1);
}
}
-void findKnnMatchDispatcher(int k, const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType)
+void findKnnMatchDispatcher(int k, const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
{
findKnnMatch<256>(k, trainIdx, distance, allDist, distType);
}
//with mask
-template <typename T/*, typename Mask*/>
-void kmatchDispatcher(const oclMat& query, const oclMat& train, int k, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType)
+template < typename T/*, typename Mask*/ >
+void kmatchDispatcher(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
{
if (k == 2)
{
}
//without mask
-template <typename T/*, typename Mask*/>
-void kmatchDispatcher(const oclMat& query, const oclMat& train, int k,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist, int distType)
+template < typename T/*, typename Mask*/ >
+void kmatchDispatcher(const oclMat &query, const oclMat &train, int k,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist, int distType)
{
- oclMat mask;
+ oclMat mask;
if (k == 2)
{
match2Dispatcher<T>(query, train, mask, trainIdx, distance, distType);
-template <typename T>
-void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance)
{
- int distType = 0;
- if (mask.data)
- {
- matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
- }
- else
- {
- matchDispatcher< T >(query, train, trainIdx, distance, distType);
- }
+ int distType = 0;
+ if (mask.data)
+ {
+ matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher< T >(query, train, trainIdx, distance, distType);
+ }
}
-template <typename T>
-void ocl_matchL1_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks,
- const oclMat& trainIdx, const oclMat &imgIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchL1_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks,
+ const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance)
{
- int distType = 0;
+ int distType = 0;
- if (masks.data)
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
- }
- else
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
- }
+ if (masks.data)
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
+ }
}
-template <typename T>
-void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance)
{
- int distType = 1;
- if (mask.data)
- {
- matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
- }
- else
- {
- matchDispatcher<T >(query, train, trainIdx, distance, distType);
- }
+ int distType = 1;
+ if (mask.data)
+ {
+ matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher<T >(query, train, trainIdx, distance, distType);
+ }
}
-template <typename T>
-void ocl_matchL2_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks,
- const oclMat& trainIdx, const oclMat &imgIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchL2_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks,
+ const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance)
{
- int distType = 1;
- if (masks.data)
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
- }
- else
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
- }
+ int distType = 1;
+ if (masks.data)
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
+ }
}
-template <typename T>
-void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance)
{
- int distType = 2;
- if (mask.data)
- {
- matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
- }
- else
- {
- matchDispatcher< T >(query, train, trainIdx, distance, distType);
- }
+ int distType = 2;
+ if (mask.data)
+ {
+ matchDispatcher<T>(query, train, mask, trainIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher< T >(query, train, trainIdx, distance, distType);
+ }
}
-template <typename T>
-void ocl_matchHamming_gpu(const oclMat& query, const oclMat& trains, const oclMat& masks,
- const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance)
+template <typename T>
+void ocl_matchHamming_gpu(const oclMat &query, const oclMat &trains, const oclMat &masks,
+ const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance)
{
- int distType = 2;
- if (masks.data)
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
- }
- else
- {
- matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
- }
+ int distType = 2;
+ if (masks.data)
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, masks, trainIdx, imgIdx, distance, distType);
+ }
+ else
+ {
+ matchDispatcher<T>(query, (const oclMat *)trains.ptr(), trains.cols, trainIdx, imgIdx, distance, distType);
+ }
}
// knn caller
-template <typename T>
-void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist)
+template <typename T>
+void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist)
{
- int distType = 0;
+ int distType = 0;
if (mask.data)
kmatchDispatcher<T>(query, train, k, mask, trainIdx, distance, allDist, distType);
kmatchDispatcher<T>(query, train, k, trainIdx, distance, allDist, distType);
}
-template <typename T>
-void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist)
+template <typename T>
+void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist)
{
- int distType = 1;
+ int distType = 1;
if (mask.data)
kmatchDispatcher<T>(query, train, k, mask, trainIdx, distance, allDist, distType);
kmatchDispatcher<T>(query, train, k, trainIdx, distance, allDist, distType);
}
-template <typename T>
-void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, int k, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist)
+template <typename T>
+void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, int k, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &allDist)
{
- int distType = 2;
+ int distType = 2;
- if (mask.data)
- kmatchDispatcher<T>(query, train, k, mask, trainIdx, distance, allDist, distType);
- else
- kmatchDispatcher<T>(query, train, k, trainIdx, distance, allDist, distType);
+ if (mask.data)
+ kmatchDispatcher<T>(query, train, k, mask, trainIdx, distance, allDist, distType);
+ else
+ kmatchDispatcher<T>(query, train, k, trainIdx, distance, allDist, distType);
}
//radius caller
-template <typename T>
-void ocl_matchL1_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches)
+template <typename T>
+void ocl_matchL1_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches)
{
- int distType = 0;
+ int distType = 0;
- if (mask.data)
- matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
- else
- matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
+ if (mask.data)
+ matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
+ else
+ matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
}
-template <typename T>
-void ocl_matchL2_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches)
+template <typename T>
+void ocl_matchL2_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches)
{
- int distType = 1;
+ int distType = 1;
- if (mask.data)
- matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
- else
- matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
+ if (mask.data)
+ matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
+ else
+ matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
}
-template <typename T>
-void ocl_matchHamming_gpu(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches)
+template <typename T>
+void ocl_matchHamming_gpu(const oclMat &query, const oclMat &train, float maxDistance, const oclMat &mask,
+ const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches)
{
- int distType = 2;
+ int distType = 2;
- if (mask.data)
- matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
- else
- matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
+ if (mask.data)
+ matchDispatcher<T>(query, train, maxDistance, mask, trainIdx, distance, nMatches, distType);
+ else
+ matchDispatcher<T>(query, train, maxDistance, trainIdx, distance, nMatches, distType);
}
cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType distType_) : distType(distType_)
{
}
-void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat>& descCollection)
+void cv::ocl::BruteForceMatcher_OCL_base::add(const vector<oclMat> &descCollection)
{
- trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
+ trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
}
-const vector<oclMat>& cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
-{
- return trainDescCollection;
+const vector<oclMat> &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const
+{
+ return trainDescCollection;
}
-void cv::ocl::BruteForceMatcher_OCL_base::clear()
+void cv::ocl::BruteForceMatcher_OCL_base::clear()
{
- trainDescCollection.clear();
+ trainDescCollection.clear();
}
-bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
-{
- return trainDescCollection.empty();
+bool cv::ocl::BruteForceMatcher_OCL_base::empty() const
+{
+ return trainDescCollection.empty();
}
-bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
-{
- return true;
+bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const
+{
+ return true;
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat& query, const oclMat& train,
- oclMat& trainIdx, oclMat& distance, const oclMat& mask)
-{
- if (query.empty() || train.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &query, const oclMat &train,
+ oclMat &trainIdx, oclMat &distance, const oclMat &mask)
+{
+ if (query.empty() || train.empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat& train, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance);
+ typedef void (*caller_t)(const oclMat & query, const oclMat & train, const oclMat & mask,
+ const oclMat & trainIdx, const oclMat & distance);
static const caller_t callers[3][6] =
{
CV_Assert(train.cols == query.cols && train.type() == query.type());
const int nQuery = query.rows;
- trainIdx.create(1, nQuery, CV_32S);
- distance.create(1, nQuery, CV_32F);
+ trainIdx.create(1, nQuery, CV_32S);
+ distance.create(1, nQuery, CV_32F);
- caller_t func = callers[distType][query.depth()];
- func(query, train, mask, trainIdx, distance);
+ caller_t func = callers[distType][query.depth()];
+ func(query, train, mask, trainIdx, distance);
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat& trainIdx, const oclMat& distance, vector<DMatch>&matches)
-{
- if (trainIdx.empty() || distance.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &distance, vector<DMatch> &matches)
+{
+ if (trainIdx.empty() || distance.empty())
return;
-
+
Mat trainIdxCPU(trainIdx);
Mat distanceCPU(distance);
matchConvert(trainIdxCPU, distanceCPU, matches);
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& distance, vector<DMatch>&matches)
-{
- if (trainIdx.empty() || distance.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &distance, vector<DMatch> &matches)
+{
+ if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
matches.clear();
matches.reserve(nQuery);
- const int* trainIdx_ptr = trainIdx.ptr<int>();
- const float* distance_ptr = distance.ptr<float>();
+ const int *trainIdx_ptr = trainIdx.ptr<int>();
+ const float *distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, const oclMat& train, vector<DMatch>& matches, const oclMat& mask)
+void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, const oclMat &train, vector<DMatch> &matches, const oclMat &mask)
{
- oclMat trainIdx, distance;
+ oclMat trainIdx, distance;
matchSingle(query, train, trainIdx, distance, mask);
matchDownload(trainIdx, distance, matches);
}
-void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const vector<oclMat>& masks)
-{
+void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const vector<oclMat> &masks)
+{
- if (empty())
+ if (empty())
return;
if (masks.empty())
{
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
- oclMat* trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
+ oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
*trainCollectionCPU_ptr = trainDescCollection[i];
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(oclMat)));
- oclMat* trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
- oclMat* maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>();
+ oclMat *trainCollectionCPU_ptr = trainCollectionCPU.ptr<oclMat>();
+ oclMat *maskCollectionCPU_ptr = maskCollectionCPU.ptr<oclMat>();
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
{
- const oclMat& train = trainDescCollection[i];
- const oclMat& mask = masks[i];
+ const oclMat &train = trainDescCollection[i];
+ const oclMat &mask = masks[i];
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows));
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx,
- oclMat& imgIdx, oclMat& distance, const oclMat& masks)
-{
- if (query.empty() || trainCollection.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx,
+ oclMat &imgIdx, oclMat &distance, const oclMat &masks)
+{
+ if (query.empty() || trainCollection.empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat& trains, const oclMat& masks,
- const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance);
+ typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
+ const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
static const caller_t callers[3][6] =
{
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
const int nQuery = query.rows;
-
- trainIdx.create(1, nQuery, CV_32S);
- imgIdx.create(1, nQuery, CV_32S);
- distance.create(1, nQuery, CV_32F);
+
+ trainIdx.create(1, nQuery, CV_32S);
+ imgIdx.create(1, nQuery, CV_32S);
+ distance.create(1, nQuery, CV_32F);
caller_t func = callers[distType][query.depth()];
CV_Assert(func != 0);
func(query, trainCollection, masks, trainIdx, imgIdx, distance);
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, vector<DMatch>& matches)
+void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, vector<DMatch> &matches)
{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty())
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx);
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
}
-void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, vector<DMatch>& matches)
-{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, vector<DMatch> &matches)
+{
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
matches.clear();
matches.reserve(nQuery);
- const int* trainIdx_ptr = trainIdx.ptr<int>();
- const int* imgIdx_ptr = imgIdx.ptr<int>();
- const float* distance_ptr = distance.ptr<float>();
+ const int *trainIdx_ptr = trainIdx.ptr<int>();
+ const int *imgIdx_ptr = imgIdx.ptr<int>();
+ const float *distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
{
int trainIdx = *trainIdx_ptr;
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, vector<DMatch>& matches, const vector<oclMat>& masks)
-{
- oclMat trainCollection;
+void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &query, vector<DMatch> &matches, const vector<oclMat> &masks)
+{
+ oclMat trainCollection;
oclMat maskCollection;
makeGpuCollection(trainCollection, maskCollection, masks);
}
// knn match
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx,
- oclMat& distance, oclMat& allDist, int k, const oclMat& mask)
-{
- if (query.empty() || train.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx,
+ oclMat &distance, oclMat &allDist, int k, const oclMat &mask)
+{
+ if (query.empty() || train.empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat& train, int k, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& allDist);
+ typedef void (*caller_t)(const oclMat & query, const oclMat & train, int k, const oclMat & mask,
+ const oclMat & trainIdx, const oclMat & distance, const oclMat & allDist);
static const caller_t callers[3][6] =
{
if (k == 2)
{
- trainIdx.create(1, nQuery, CV_32SC2);
- distance.create(1, nQuery, CV_32FC2);
+ trainIdx.create(1, nQuery, CV_32SC2);
+ distance.create(1, nQuery, CV_32FC2);
}
else
{
- trainIdx.create(nQuery, k, CV_32S);
- distance.create(nQuery, k, CV_32F);
- allDist.create(nQuery, nTrain, CV_32FC1);
+ trainIdx.create(nQuery, k, CV_32S);
+ distance.create(nQuery, k, CV_32F);
+ allDist.create(nQuery, nTrain, CV_32FC1);
}
trainIdx.setTo(Scalar::all(-1));
caller_t func = callers[distType][query.depth()];
CV_Assert(func != 0);
- func(query, train, k, mask, trainIdx, distance, allDist);
+ func(query, train, k, mask, trainIdx, distance, allDist);
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat& trainIdx, const oclMat& distance, vector< vector<DMatch> >& matches, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, vector< vector<DMatch> > &matches, bool compactResult)
{
- if (trainIdx.empty() || distance.empty())
+ if (trainIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx);
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, vector< vector<DMatch> >& matches, bool compactResult)
-{
- if (trainIdx.empty() || distance.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &trainIdx, const Mat &distance, vector< vector<DMatch> > &matches, bool compactResult)
+{
+ if (trainIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1);
CV_Assert(trainIdx.isContinuous() && distance.isContinuous());
const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows;
- const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols;
+ const int k = trainIdx.type() == CV_32SC2 ? 2 : trainIdx.cols;
matches.clear();
matches.reserve(nQuery);
- const int* trainIdx_ptr = trainIdx.ptr<int>();
- const float* distance_ptr = distance.ptr<float>();
+ const int *trainIdx_ptr = trainIdx.ptr<int>();
+ const float *distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
matches.push_back(vector<DMatch>());
- vector<DMatch>& curMatches = matches.back();
+ vector<DMatch> &curMatches = matches.back();
curMatches.reserve(k);
for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr)
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, const oclMat& train, vector< vector<DMatch> >& matches
- , int k, const oclMat& mask, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, const oclMat &train, vector< vector<DMatch> > &matches
+ , int k, const oclMat &mask, bool compactResult)
{
- oclMat trainIdx, distance, allDist;
+ oclMat trainIdx, distance, allDist;
knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask);
knnMatchDownload(trainIdx, distance, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& query, const oclMat& trainCollection,
- oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& maskCollection)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &query, const oclMat &trainCollection,
+ oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &maskCollection)
{
- if (query.empty() || trainCollection.empty())
+ if (query.empty() || trainCollection.empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat& trains, const oclMat& masks,
- const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance);
+ typedef void (*caller_t)(const oclMat & query, const oclMat & trains, const oclMat & masks,
+ const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance);
#if 0
static const caller_t callers[3][6] =
{
const int nQuery = query.rows;
- trainIdx.create(1, nQuery, CV_32SC2);
- imgIdx.create(1, nQuery, CV_32SC2);
- distance.create(1, nQuery, CV_32SC2);
+ trainIdx.create(1, nQuery, CV_32SC2);
+ imgIdx.create(1, nQuery, CV_32SC2);
+ distance.create(1, nQuery, CV_32SC2);
trainIdx.setTo(Scalar::all(-1));
//func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, cc, StreamAccessor::getStream(stream));
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx,
- const oclMat& distance, vector< vector<DMatch> >& matches, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx,
+ const oclMat &distance, vector< vector<DMatch> > &matches, bool compactResult)
{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty())
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
Mat trainIdxCPU(trainIdx);
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
- vector< vector<DMatch> >& matches, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance,
+ vector< vector<DMatch> > &matches, bool compactResult)
{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty())
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC2);
matches.clear();
matches.reserve(nQuery);
- const int* trainIdx_ptr = trainIdx.ptr<int>();
- const int* imgIdx_ptr = imgIdx.ptr<int>();
- const float* distance_ptr = distance.ptr<float>();
+ const int *trainIdx_ptr = trainIdx.ptr<int>();
+ const int *imgIdx_ptr = imgIdx.ptr<int>();
+ const float *distance_ptr = distance.ptr<float>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
matches.push_back(vector<DMatch>());
- vector<DMatch>& curMatches = matches.back();
+ vector<DMatch> &curMatches = matches.back();
curMatches.reserve(2);
for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
struct ImgIdxSetter
{
explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {}
- inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;}
+ inline void operator()(DMatch &m) const
+ {
+ m.imgIdx = imgIdx;
+ }
int imgIdx;
};
}
-void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, vector< vector<DMatch> >& matches, int k,
- const vector<oclMat>& masks, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &query, vector< vector<DMatch> > &matches, int k,
+ const vector<oclMat> &masks, bool compactResult)
{
-
- if (k == 2)
+
+ if (k == 2)
{
oclMat trainCollection;
oclMat maskCollection;
for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
{
- vector<DMatch>& localMatch = curMatches[queryIdx];
- vector<DMatch>& globalMatch = matches[queryIdx];
+ vector<DMatch> &localMatch = curMatches[queryIdx];
+ vector<DMatch> &globalMatch = matches[queryIdx];
for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast<int>(imgIdx)));
temp.clear();
- merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
+ merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp));
globalMatch.clear();
const size_t count = std::min((size_t)k, temp.size());
}
// radiusMatchSingle
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, const oclMat& train,
- oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const oclMat& mask)
-{
- if (query.empty() || train.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &query, const oclMat &train,
+ oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask)
+{
+ if (query.empty() || train.empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat& train, float maxDistance, const oclMat& mask,
- const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches);
+ typedef void (*caller_t)(const oclMat & query, const oclMat & train, float maxDistance, const oclMat & mask,
+ const oclMat & trainIdx, const oclMat & distance, const oclMat & nMatches);
- //#if 0
- static const caller_t callers[3][6] =
+ //#if 0
+ static const caller_t callers[3][6] =
{
{
ocl_matchL1_gpu<unsigned char>, 0/*ocl_matchL1_gpu<signed char>*/,
ocl_matchHamming_gpu<int>, 0/*ocl_matchHamming_gpu<float>*/
}
};
-//#endif
+ //#endif
const int nQuery = query.rows;
const int nTrain = train.rows;
CV_Assert(train.type() == query.type() && train.cols == query.cols);
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size()));
- nMatches.create(1, nQuery, CV_32SC1);
+ nMatches.create(1, nQuery, CV_32SC1);
if (trainIdx.empty())
{
- trainIdx.create(nQuery, std::max((nTrain / 100), 10), CV_32SC1);
- distance.create(nQuery, std::max((nTrain / 100), 10), CV_32FC1);
+ trainIdx.create(nQuery, std::max((nTrain / 100), 10), CV_32SC1);
+ distance.create(nQuery, std::max((nTrain / 100), 10), CV_32FC1);
}
nMatches.setTo(Scalar::all(0));
- caller_t func = callers[distType][query.depth()];
- //CV_Assert(func != 0);
- //func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
- func(query, train, maxDistance, mask, trainIdx, distance, nMatches);
+ caller_t func = callers[distType][query.depth()];
+ //CV_Assert(func != 0);
+ //func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
+ func(query, train, maxDistance, mask, trainIdx, distance, nMatches);
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches,
- vector< vector<DMatch> >& matches, bool compactResult)
-{
- if (trainIdx.empty() || distance.empty() || nMatches.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches,
+ vector< vector<DMatch> > &matches, bool compactResult)
+{
+ if (trainIdx.empty() || distance.empty() || nMatches.empty())
return;
Mat trainIdxCPU(trainIdx);
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
- vector< vector<DMatch> >& matches, bool compactResult)
-{
- if (trainIdx.empty() || distance.empty() || nMatches.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches,
+ vector< vector<DMatch> > &matches, bool compactResult)
+{
+ if (trainIdx.empty() || distance.empty() || nMatches.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
matches.clear();
matches.reserve(nQuery);
- const int* nMatches_ptr = nMatches.ptr<int>();
+ const int *nMatches_ptr = nMatches.ptr<int>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
- const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
- const float* distance_ptr = distance.ptr<float>(queryIdx);
+ const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
+ const float *distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
}
matches.push_back(vector<DMatch>(nMatches));
- vector<DMatch>& curMatches = matches.back();
+ vector<DMatch> &curMatches = matches.back();
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++distance_ptr)
{
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, const oclMat& train, vector< vector<DMatch> >& matches,
- float maxDistance, const oclMat& mask, bool compactResult)
-{
- oclMat trainIdx, distance, nMatches;
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, const oclMat &train, vector< vector<DMatch> > &matches,
+ float maxDistance, const oclMat &mask, bool compactResult)
+{
+ oclMat trainIdx, distance, nMatches;
radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask);
radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
- oclMat& nMatches, float maxDistance, const vector<oclMat>& masks)
-{
- if (query.empty() || empty())
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance,
+ oclMat &nMatches, float maxDistance, const vector<oclMat> &masks)
+{
+ if (query.empty() || empty())
return;
- typedef void (*caller_t)(const oclMat& query, const oclMat* trains, int n, float maxDistance, const oclMat* masks,
- const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches);
+ typedef void (*caller_t)(const oclMat & query, const oclMat * trains, int n, float maxDistance, const oclMat * masks,
+ const oclMat & trainIdx, const oclMat & imgIdx, const oclMat & distance, const oclMat & nMatches);
#if 0
static const caller_t callers[3][6] =
{
CV_Assert(query.channels() == 1 && query.depth() < CV_64F);
CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size()));
- nMatches.create(1, nQuery, CV_32SC1);
+ nMatches.create(1, nQuery, CV_32SC1);
if (trainIdx.empty())
{
- trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
- imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
- distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1);
+ trainIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
+ imgIdx.create(nQuery, std::max((nQuery / 100), 10), CV_32SC1);
+ distance.create(nQuery, std::max((nQuery / 100), 10), CV_32FC1);
}
nMatches.setTo(Scalar::all(0));
vector<oclMat> trains_(trainDescCollection.begin(), trainDescCollection.end());
vector<oclMat> masks_(masks.begin(), masks.end());
- /* func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
- trainIdx, imgIdx, distance, nMatches));*/
+ /* func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
+ trainIdx, imgIdx, distance, nMatches));*/
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance,
- const oclMat& nMatches, vector< vector<DMatch> >& matches, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance,
+ const oclMat &nMatches, vector< vector<DMatch> > &matches, bool compactResult)
{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
return;
Mat trainIdxCPU(trainIdx);
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
- vector< vector<DMatch> >& matches, bool compactResult)
-{
- if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches,
+ vector< vector<DMatch> > &matches, bool compactResult)
+{
+ if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
return;
CV_Assert(trainIdx.type() == CV_32SC1);
matches.clear();
matches.reserve(nQuery);
- const int* nMatches_ptr = nMatches.ptr<int>();
+ const int *nMatches_ptr = nMatches.ptr<int>();
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
{
- const int* trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
- const int* imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
- const float* distance_ptr = distance.ptr<float>(queryIdx);
+ const int *trainIdx_ptr = trainIdx.ptr<int>(queryIdx);
+ const int *imgIdx_ptr = imgIdx.ptr<int>(queryIdx);
+ const float *distance_ptr = distance.ptr<float>(queryIdx);
const int nMatches = std::min(nMatches_ptr[queryIdx], trainIdx.cols);
}
matches.push_back(vector<DMatch>());
- vector<DMatch>& curMatches = matches.back();
+ vector<DMatch> &curMatches = matches.back();
curMatches.reserve(nMatches);
for (int i = 0; i < nMatches; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr)
}
}
-void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, vector< vector<DMatch> >& matches, float maxDistance,
- const vector<oclMat>& masks, bool compactResult)
+void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vector< vector<DMatch> > &matches, float maxDistance,
+ const vector<oclMat> &masks, bool compactResult)
{
- oclMat trainIdx, imgIdx, distance, nMatches;
+ oclMat trainIdx, imgIdx, distance, nMatches;
radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks);
radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult);
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.\r
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// @Authors\r
+// Peng Xiao, pengxiao@multicorewareinc.com\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other oclMaterials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors as is and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include "precomp.hpp"\r
+\r
+using namespace cv;\r
+using namespace cv::ocl;\r
+using namespace std;\r
+\r
+#if !defined (HAVE_OPENCL)\r
+void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)\r
+{\r
+ throw_nogpu();\r
+}\r
+void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)\r
+{\r
+ throw_nogpu();\r
+}\r
+void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &)\r
+{\r
+ throw_nogpu();\r
+}\r
+#else\r
+\r
+namespace cv\r
+{\r
+ namespace ocl\r
+ {\r
+ ///////////////////////////OpenCL kernel strings///////////////////////////\r
+ extern const char *build_warps;\r
+ }\r
+}\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// buildWarpPlaneMaps\r
+\r
+void cv::ocl::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,\r
+ float scale, oclMat &map_x, oclMat &map_y)\r
+{\r
+ CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);\r
+ CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);\r
+ CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());\r
+\r
+ Mat K_Rinv = K * R.t();\r
+ CV_Assert(K_Rinv.isContinuous());\r
+\r
+ Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3\r
+ KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);\r
+ KRT_mat(Range::all(), Range(9, 11)) = T;\r
+\r
+ oclMat KRT_oclMat(KRT_mat);\r
+ // transfer K_Rinv and T into a single cl_mem\r
+ map_x.create(dst_roi.size(), CV_32F);\r
+ map_y.create(dst_roi.size(), CV_32F);\r
+\r
+ int tl_u = dst_roi.tl().x;\r
+ int tl_v = dst_roi.tl().y;\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "buildWarpPlaneMaps";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&scale));\r
+\r
+ size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};\r
+ size_t localThreads[3] = {32, 8, 1};\r
+ openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// buildWarpCylyndricalMaps\r
+\r
+void cv::ocl::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,\r
+ oclMat &map_x, oclMat &map_y)\r
+{\r
+ CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);\r
+ CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);\r
+\r
+ Mat K_Rinv = K * R.t();\r
+ CV_Assert(K_Rinv.isContinuous());\r
+\r
+ oclMat KR_oclMat(K_Rinv.reshape(1, 1));\r
+\r
+ map_x.create(dst_roi.size(), CV_32F);\r
+ map_y.create(dst_roi.size(), CV_32F);\r
+\r
+ int tl_u = dst_roi.tl().x;\r
+ int tl_v = dst_roi.tl().y;\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "buildWarpCylindricalMaps";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&scale));\r
+\r
+ size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};\r
+ size_t localThreads[3] = {32, 8, 1};\r
+ openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+//////////////////////////////////////////////////////////////////////////////\r
+// buildWarpSphericalMaps\r
+void cv::ocl::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale,\r
+ oclMat &map_x, oclMat &map_y)\r
+{\r
+ CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);\r
+ CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);\r
+\r
+ Mat K_Rinv = K * R.t();\r
+ CV_Assert(K_Rinv.isContinuous());\r
+\r
+ oclMat KR_oclMat(K_Rinv.reshape(1, 1));\r
+ // transfer K_Rinv, R_Kinv into a single cl_mem\r
+ map_x.create(dst_roi.size(), CV_32F);\r
+ map_y.create(dst_roi.size(), CV_32F);\r
+\r
+ int tl_u = dst_roi.tl().x;\r
+ int tl_v = dst_roi.tl().y;\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "buildWarpSphericalMaps";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&scale));\r
+\r
+ size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};\r
+ size_t localThreads[3] = {32, 8, 1};\r
+ openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+\r
+void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)\r
+{\r
+\r
+ CV_Assert(M.rows == 2 && M.cols == 3);\r
+\r
+ xmap.create(dsize, CV_32FC1);\r
+ ymap.create(dsize, CV_32FC1);\r
+\r
+ float coeffs[2 * 3];\r
+ Mat coeffsMat(2, 3, CV_32F, (void *)coeffs);\r
+\r
+ if (inverse)\r
+ M.convertTo(coeffsMat, coeffsMat.type());\r
+ else\r
+ {\r
+ cv::Mat iM;\r
+ invertAffineTransform(M, iM);\r
+ iM.convertTo(coeffsMat, coeffsMat.type());\r
+ }\r
+\r
+ oclMat coeffsOclMat(coeffsMat.reshape(1, 1));\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "buildWarpAffineMaps";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));\r
+\r
+ size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};\r
+ size_t localThreads[3] = {32, 8, 1};\r
+ openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)\r
+{\r
+\r
+ CV_Assert(M.rows == 3 && M.cols == 3);\r
+\r
+ xmap.create(dsize, CV_32FC1);\r
+ ymap.create(dsize, CV_32FC1);\r
+\r
+ float coeffs[3 * 3];\r
+ Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);\r
+\r
+ if (inverse)\r
+ M.convertTo(coeffsMat, coeffsMat.type());\r
+ else\r
+ {\r
+ cv::Mat iM;\r
+ invert(M, iM);\r
+ iM.convertTo(coeffsMat, coeffsMat.type());\r
+ }\r
+\r
+ oclMat coeffsOclMat(coeffsMat.reshape(1, 1));\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "buildWarpPerspectiveMaps";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));\r
+\r
+ size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};\r
+ size_t localThreads[3] = {32, 8, 1};\r
+ openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+\r
+#endif // HAVE_OPENCL\r
using namespace std;
#if !defined (HAVE_OPENCL)
-void cv::ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) { throw_nogpu(); }
-void cv::ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false){ throw_nogpu(); }
-void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
-void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false){ throw_nogpu(); }
+void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
+{
+ throw_nogpu();
+}
+void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
+{
+ throw_nogpu();
+}
+void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
+{
+ throw_nogpu();
+}
+void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false)
+{
+ throw_nogpu();
+}
#else
namespace cv
}
}
-cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL)
+cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(NULL)
{
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
create(dx_.size(), -1);
}
-void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
+void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
{
ensureSizeIsEnough(image_size, CV_32SC1, dx);
ensureSizeIsEnough(image_size, CV_32SC1, dy);
openCLFree(counter);
}
-namespace cv { namespace ocl {
- namespace canny
+namespace cv
+{
+ namespace ocl
{
- void calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols);
+ namespace canny
+ {
+ void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
- void calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
- void calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad);
+ void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
+ void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
- void calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh);
+ void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
- void edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols);
+ void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols);
- void edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols);
+ void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols);
- void getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols);
+ void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
+ }
}
-}}// cv::ocl
+}// cv::ocl
namespace
{
- void CannyCaller(CannyBuf& buf, oclMat& dst, float low_thresh, float high_thresh)
+ void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
{
using namespace ::cv::ocl::canny;
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
}
}
-void cv::ocl::Canny(const oclMat& src, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
{
CannyBuf buf(src.size(), apperture_size);
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
}
-void cv::ocl::Canny(const oclMat& src, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
+void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
{
using namespace ::cv::ocl::canny;
}
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
-void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient)
+void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
{
CannyBuf buf(dx, dy);
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
}
-void cv::ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& dst, double low_thresh, double high_thresh, bool L2gradient)
+void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
{
using namespace ::cv::ocl::canny;
dst.create(dx.size(), CV_8U);
dst.setTo(Scalar::all(0));
- buf.dx = dx; buf.dy = dy;
+ buf.dx = dx;
+ buf.dy = dy;
buf.create(dx.size(), -1);
buf.edgeBuf.setTo(Scalar::all(0));
calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
}
-void canny::calcSobelRowPass_gpu(const oclMat& src, oclMat& dx_buf, oclMat& dy_buf, int rows, int cols)
+void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols)
{
Context *clCxt = src.clCxt;
string kernelName = "calcSobelRowPass";
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void canny::calcMagnitude_gpu(const oclMat& dx_buf, const oclMat& dy_buf, oclMat& dx, oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
+void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
Context *clCxt = dx_buf.clCxt;
string kernelName = "calcMagnitude_buf";
}
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
-void canny::calcMagnitude_gpu(const oclMat& dx, const oclMat& dy, oclMat& mag, int rows, int cols, bool L2Grad)
+void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
{
Context *clCxt = dx.clCxt;
string kernelName = "calcMagnitude";
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
}
-void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int rows, int cols, float low_thresh, float high_thresh)
+void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
{
Context *clCxt = dx.clCxt;
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void canny::edgesHysteresisLocal_gpu(oclMat& map, oclMat& st1, void * counter, int rows, int cols)
+void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, int rows, int cols)
{
Context *clCxt = map.clCxt;
string kernelName = "edgesHysteresisLocal";
openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void canny::edgesHysteresisGlobal_gpu(oclMat& map, oclMat& st1, oclMat& st2, void * counter, int rows, int cols)
+void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
{
unsigned int count;
openCLSafeCall(clEnqueueReadBuffer(Context::getContext()->impl->clCmdQueue, (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
#undef DIVUP
}
-void canny::getEdges_gpu(oclMat& map, oclMat& dst, int rows, int cols)
+void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
{
Context *clCxt = map.clCxt;
string kernelName = "getEdges";
void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
{
vector<pair<size_t , const void *> > args;
- int channels = src.channels();
+ int channels = src.oclchannels();
char build_options[50];
- //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.channels(),bidx);
+ //printf("depth:%d,channels:%d,bidx:%d\n",src.depth(),src.oclchannels(),bidx);
sprintf(build_options, "-D DEPTH_%d", src.depth());
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
{
Size sz = src.size();
- int scn = src.channels(), depth = src.depth(), bidx;
+ int scn = src.oclchannels(), depth = src.depth(), bidx;
CV_Assert(depth == CV_8U || depth == CV_16U);
#if !defined(HAVE_OPENCL)
-void cv::ocl::columnSum(const oclMat& src,oclMat& dst){ throw_nogpu(); }
+void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
+{
+ throw_nogpu();
+}
#else /*!HAVE_OPENCL */
-namespace cv
-{
- namespace ocl
- {
- extern const char* imgproc_columnsum;
- }
+namespace cv
+{
+ namespace ocl
+ {
+ extern const char *imgproc_columnsum;
+ }
}
-void cv::ocl::columnSum(const oclMat& src,oclMat& dst)
+void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
{
- CV_Assert(src.type() == CV_32FC1);
+ CV_Assert(src.type() == CV_32FC1);
+
+ dst.create(src.size(), src.type());
+
+ Context *clCxt = src.clCxt;
- dst.create(src.size(), src.type());
+ const std::string kernelName = "columnSum";
- Context *clCxt = src.clCxt;
-
- const std::string kernelName = "columnSum";
-
- std::vector< pair<size_t, const void *> > args;
+ std::vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
- size_t globalThreads[3] = {dst.cols, 1, 1};
- size_t localThreads[3] = {16, 16, 1};
+ size_t globalThreads[3] = {dst.cols, 1, 1};
+ size_t localThreads[3] = {16, 16, 1};
- openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
}
-#endif
\ No newline at end of file
+#endif
\ No newline at end of file
using namespace std;
#if !defined (HAVE_OPENCL)
-void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
+{
+ throw_nogpu();
+}
#else
#include <clAmdFft.h>
-namespace cv{ namespace ocl {
- enum FftType
- {
- C2R = 1, // complex to complex
- R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
- C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
- };
- struct FftPlan
+namespace cv
+{
+ namespace ocl
{
- friend void fft_setup();
- friend void fft_teardown();
- ~FftPlan();
- protected:
- FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
- const Size dft_size;
- const int src_step, dst_step;
- const int flags;
- const FftType type;
- clAmdFftPlanHandle plHandle;
- static vector<FftPlan*> planStore;
- static bool started;
- static clAmdFftSetupData * setupData;
- public:
- // return a baked plan->
- // if there is one matched plan, return it
- // if not, bake a new one, put it into the planStore and return it.
- static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
- };
-}}
+ enum FftType
+ {
+ C2R = 1, // complex to complex
+ R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
+ C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
+ };
+ struct FftPlan
+ {
+ friend void fft_setup();
+ friend void fft_teardown();
+ ~FftPlan();
+ protected:
+ FftPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+ const Size dft_size;
+ const int src_step, dst_step;
+ const int flags;
+ const FftType type;
+ clAmdFftPlanHandle plHandle;
+ static vector<FftPlan *> planStore;
+ static bool started;
+ static clAmdFftSetupData *setupData;
+ public:
+ // return a baked plan->
+ // if there is one matched plan, return it
+ // if not, bake a new one, put it into the planStore and return it.
+ static clAmdFftPlanHandle getPlan(Size _dft_size, int _src_step, int _dst_step, int _flags, FftType _type);
+ };
+ }
+}
bool cv::ocl::FftPlan::started = false;
-vector<cv::ocl::FftPlan*> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan*>();
-clAmdFftSetupData * cv::ocl::FftPlan::setupData = 0;
+vector<cv::ocl::FftPlan *> cv::ocl::FftPlan::planStore = vector<cv::ocl::FftPlan *>();
+clAmdFftSetupData *cv::ocl::FftPlan::setupData = 0;
void cv::ocl::fft_setup()
{
clAmdFftResultLocation place;
clAmdFftLayout inLayout;
clAmdFftLayout outLayout;
- clAmdFftDim dim = is_1d_input||is_row_dft ? CLFFT_1D : CLFFT_2D;
+ clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
- size_t batchSize = is_row_dft?dft_size.height : 1;
+ size_t batchSize = is_row_dft ? dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
size_t clLengthsOut[ 3 ] = {1, 1, 1};
{
if(planStore[i]->plHandle == plHandle)
{
- planStore.erase(planStore.begin()+ i);
+ planStore.erase(planStore.begin() + i);
}
}
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
// go through search
for(int i = 0; i < planStore.size(); i ++)
{
- FftPlan * plan = planStore[i];
+ FftPlan *plan = planStore[i];
if(
- plan->dft_size.width == _dft_size.width &&
+ plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height &&
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->type == _type
- )
+ )
{
return plan->plHandle;
}
return newPlan->plHandle;
}
-void cv::ocl::dft(const oclMat& src, oclMat& dst, Size dft_size, int flags)
+void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
{
- if(dft_size == Size(0,0))
+ if(dft_size == Size(0, 0))
{
dft_size = src.size();
}
break;
case R2C:
CV_Assert(!is_row_dft); // this is not supported yet
- dst.create(src.rows, src.cols/2 + 1, CV_32FC2);
+ dst.create(src.rows, src.cols / 2 + 1, CV_32FC2);
break;
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
clAmdFftPlanHandle plHandle = FftPlan::getPlan(dft_size, src.step, dst.step, flags, type);
//get the buffersize
- size_t buffersize=0;
+ size_t buffersize = 0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
- //allocate the intermediate buffer
- cl_mem clMedBuffer=NULL;
+ //allocate the intermediate buffer
+ cl_mem clMedBuffer = NULL;
if (buffersize)
{
cl_int medstatus;
clMedBuffer = clCreateBuffer ( src.clCxt->impl->clContext, CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
}
- openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
- is_inverse?CLFFT_BACKWARD:CLFFT_FORWARD,
- 1,
- &src.clCxt->impl->clCmdQueue,
- 0, NULL, NULL,
- (cl_mem*)&src.data, (cl_mem*)&dst.data, clMedBuffer ) );
+ openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
+ is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
+ 1,
+ &src.clCxt->impl->clCmdQueue,
+ 0, NULL, NULL,
+ (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) );
openCLSafeCall( clFinish(src.clCxt->impl->clCmdQueue) );
if(clMedBuffer)
{
}
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
-{
- throw_nogpu();
- return Ptr<FilterEngine_GPU>(0);
+{
+ throw_nogpu();
+ return Ptr<FilterEngine_GPU>(0);
}
void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int)
class Filter2DEngine_GPU : public FilterEngine_GPU
{
public:
- Filter2DEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_) : filter2D(filter2D_) {}
+ Filter2DEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_) : filter2D(filter2D_) {}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
{
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
- CV_Assert( (src.channels() == dst.channels()) );
+ CV_Assert( (src.oclchannels() == dst.oclchannels()) );
- int srcStep = src.step1() / src.channels();
- int dstStep = dst.step1() / dst.channels();
+ int srcStep = src.step1() / src.oclchannels();
+ int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
- int srcOffset_x=srcOffset%srcStep;
- int srcOffset_y=srcOffset/srcStep;
+ int srcOffset_x = srcOffset % srcStep;
+ int srcOffset_y = srcOffset / srcStep;
Context *clCxt = src.clCxt;
- string kernelName;
+ string kernelName;
size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1};
-
- if(src.type()==CV_8UC1)
- {
- kernelName = "morph_C1_D0";
- globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
- CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) );
- }
- else
- {
- kernelName = "morph";
- CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) );
- }
- char s[64];
- switch(src.type())
- {
- case CV_8UC1:
- sprintf(s, "-D VAL=255");
- break;
- case CV_8UC3:
- case CV_8UC4:
- sprintf(s, "-D VAL=255 -D GENTYPE=uchar4");
- break;
- case CV_32FC1:
- sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float");
- break;
- case CV_32FC3:
- case CV_32FC4:
- sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported type");
- }
+ size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
+
+ if(src.type() == CV_8UC1)
+ {
+ kernelName = "morph_C1_D0";
+ globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
+ CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
+ }
+ else
+ {
+ kernelName = "morph";
+ CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
+ }
+ char s[64];
+ switch(src.type())
+ {
+ case CV_8UC1:
+ sprintf(s, "-D VAL=255");
+ break;
+ case CV_8UC3:
+ case CV_8UC4:
+ sprintf(s, "-D VAL=255 -D GENTYPE=uchar4");
+ break;
+ case CV_32FC1:
+ sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float");
+ break;
+ case CV_32FC3:
+ case CV_32FC4:
+ sprintf(s, "-D VAL=FLT_MAX -D GENTYPE=float4");
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported type");
+ }
char compile_option[128];
- sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
+ sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
- args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols));
- args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows));
- args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset));
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
- CV_Assert( (src.channels() == dst.channels()) );
+ CV_Assert( (src.oclchannels() == dst.oclchannels()) );
- int srcStep = src.step1() / src.channels();
- int dstStep = dst.step1() / dst.channels();
+ int srcStep = src.step1() / src.oclchannels();
+ int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
- int srcOffset_x=srcOffset%srcStep;
- int srcOffset_y=srcOffset/srcStep;
+ int srcOffset_x = srcOffset % srcStep;
+ int srcOffset_y = srcOffset / srcStep;
Context *clCxt = src.clCxt;
- string kernelName;
+ string kernelName;
size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] * localThreads[0], (src.rows + localThreads[1]) / localThreads[1] * localThreads[1], 1};
-
- if(src.type()==CV_8UC1)
- {
- kernelName = "morph_C1_D0";
- globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
- CV_Assert( localThreads[0]*localThreads[1]*8 >= (localThreads[0]*4+ksize.width-1)*(localThreads[1]+ksize.height-1) );
- }
- else
- {
- kernelName = "morph";
- CV_Assert( localThreads[0]*localThreads[1]*2 >= (localThreads[0]+ksize.width-1)*(localThreads[1]+ksize.height-1) );
- }
- char s[64];
- switch(src.type())
- {
- case CV_8UC1:
- sprintf(s, "-D VAL=0");
- break;
- case CV_8UC3:
- case CV_8UC4:
- sprintf(s, "-D VAL=0 -D GENTYPE=uchar4");
- break;
- case CV_32FC1:
- sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float");
- break;
- case CV_32FC3:
- case CV_32FC4:
- sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported type");
- }
+ size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
+
+ if(src.type() == CV_8UC1)
+ {
+ kernelName = "morph_C1_D0";
+ globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
+ CV_Assert( localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
+ }
+ else
+ {
+ kernelName = "morph";
+ CV_Assert( localThreads[0]*localThreads[1] * 2 >= (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1) );
+ }
+ char s[64];
+ switch(src.type())
+ {
+ case CV_8UC1:
+ sprintf(s, "-D VAL=0");
+ break;
+ case CV_8UC3:
+ case CV_8UC4:
+ sprintf(s, "-D VAL=0 -D GENTYPE=uchar4");
+ break;
+ case CV_32FC1:
+ sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float");
+ break;
+ case CV_32FC3:
+ case CV_32FC4:
+ sprintf(s, "-D VAL=-FLT_MAX -D GENTYPE=float4");
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported type");
+ }
char compile_option[128];
- sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1],s);
+ sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, localThreads[0], localThreads[1], s);
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_kernel.data));
- args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholecols));
- args.push_back( make_pair( sizeof(cl_int),(void*)&src.wholerows));
- args.push_back( make_pair( sizeof(cl_int),(void*)&dstOffset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholecols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.wholerows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset));
openCLExecuteKernel(clCxt, &filtering_morph, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
{
static const GPUMorfFilter_t GPUMorfFilter_callers[2][5] =
{
- {0, GPUErode, 0, 0, GPUErode },
- {0, GPUDilate, 0, 0, GPUDilate}
+ {0, GPUErode, 0, GPUErode, GPUErode },
+ {0, GPUDilate, 0, GPUDilate, GPUDilate}
};
CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
- CV_Assert(type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4);
+ CV_Assert(type == CV_8UC1 || type == CV_8UC3 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC1 || type == CV_32FC4);
oclMat gpu_krnl;
normalizeKernel(kernel, gpu_krnl);
class MorphologyFilterEngine_GPU : public Filter2DEngine_GPU
{
public:
- MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU>& filter2D_, int iters_) :
+ MorphologyFilterEngine_GPU(const Ptr<BaseFilter_GPU> &filter2D_, int iters_) :
Filter2DEngine_GPU(filter2D_), iters(iters_) {}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
namespace
{
- void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations,int borderType,const Scalar& borderValue)
+ void morphOp(int op, const oclMat &src, oclMat &dst, const Mat &_kernel, Point anchor, int iterations, int borderType, const Scalar &borderValue)
{
- if((borderType != cv::BORDER_CONSTANT) || (borderValue!=morphologyDefaultBorderValue()))
- {
- CV_Error(CV_StsBadArg,"unsupported border type");
- }
+ if((borderType != cv::BORDER_CONSTANT) || (borderValue != morphologyDefaultBorderValue()))
+ {
+ CV_Error(CV_StsBadArg, "unsupported border type");
+ }
Mat kernel;
Size ksize = _kernel.data ? _kernel.size() : Size(3, 3);
normalizeAnchor(anchor, ksize);
- if (iterations == 0 || _kernel.rows *_kernel.cols == 1)
+ if (iterations == 0 || _kernel.rows * _kernel.cols == 1)
{
src.copyTo(dst);
return;
}
void cv::ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
- int borderType,const Scalar& borderValue)
+ int borderType, const Scalar &borderValue)
{
bool allZero = true;
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
{
kernel.data[0] = 1;
}
- morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations,borderType, borderValue);
+ morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue);
}
void cv::ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor, int iterations,
- int borderType,const Scalar& borderValue)
+ int borderType, const Scalar &borderValue)
{
- morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations,borderType, borderValue);
+ morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue);
}
void cv::ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor, int iterations,
- int borderType,const Scalar& borderValue)
+ int borderType, const Scalar &borderValue)
{
oclMat temp;
switch( op )
{
case MORPH_ERODE:
- erode( src, dst, kernel, anchor, iterations,borderType, borderValue);
+ erode( src, dst, kernel, anchor, iterations, borderType, borderValue);
break;
case MORPH_DILATE:
- dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
+ dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
break;
case MORPH_OPEN:
- erode( src, temp, kernel, anchor, iterations,borderType, borderValue);
- dilate( temp, dst, kernel, anchor, iterations,borderType, borderValue);
+ erode( src, temp, kernel, anchor, iterations, borderType, borderValue);
+ dilate( temp, dst, kernel, anchor, iterations, borderType, borderValue);
break;
case CV_MOP_CLOSE:
- dilate( src, temp, kernel, anchor, iterations,borderType, borderValue);
- erode( temp, dst, kernel, anchor, iterations,borderType, borderValue);
+ dilate( src, temp, kernel, anchor, iterations, borderType, borderValue);
+ erode( temp, dst, kernel, anchor, iterations, borderType, borderValue);
break;
case CV_MOP_GRADIENT:
- erode( src, temp, kernel, anchor, iterations,borderType, borderValue);
- dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
+ erode( src, temp, kernel, anchor, iterations, borderType, borderValue);
+ dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
subtract(dst, temp, dst);
break;
case CV_MOP_TOPHAT:
- erode( src, dst, kernel, anchor, iterations,borderType, borderValue);
- dilate( dst, temp, kernel, anchor, iterations,borderType, borderValue);
+ erode( src, dst, kernel, anchor, iterations, borderType, borderValue);
+ dilate( dst, temp, kernel, anchor, iterations, borderType, borderValue);
subtract(src, temp, dst);
break;
case CV_MOP_BLACKHAT:
- dilate( src, dst, kernel, anchor, iterations,borderType, borderValue);
- erode( dst, temp, kernel, anchor, iterations,borderType, borderValue);
+ dilate( src, dst, kernel, anchor, iterations, borderType, borderValue);
+ erode( dst, temp, kernel, anchor, iterations, borderType, borderValue);
subtract(temp, src, dst);
break;
default:
CV_Assert(src.clCxt == dst.clCxt);
CV_Assert( (src.cols == dst.cols) &&
(src.rows == dst.rows) );
- CV_Assert( (src.channels() == dst.channels()) );
+ CV_Assert( (src.oclchannels() == dst.oclchannels()) );
CV_Assert( (borderType != 0) );
CV_Assert(ksize.height > 0 && ksize.width > 0 && ((ksize.height & 1) == 1) && ((ksize.width & 1) == 1));
CV_Assert((anchor.x == -1 && anchor.y == -1) || (anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1));
Context *clCxt = src.clCxt;
- int cn = src.channels();
+ int cn = src.oclchannels();
int depth = src.depth();
string kernelName = "filter2D";
{4, 4, 4, 4, 1, 1, 4}
};
- int vector_length = vector_lengths[cn-1][depth];
+ int vector_length = vector_lengths[cn - 1][depth];
int offset_cols = (dst_offset_x) & (vector_length - 1);
int cols = dst.cols + offset_cols;
int rows = divUp(dst.rows, vector_length);
size_t localThreads[3] = {256, 1, 1};
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1], 1
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1], 1
};
vector< pair<size_t, const void *> > args;
Ptr<BaseFilter_GPU> cv::ocl::getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize,
Point anchor, int borderType)
{
- static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, 0, GPUFilter2D};
+ static const GPUFilter2D_t GPUFilter2D_callers[] = {0, GPUFilter2D, 0, GPUFilter2D, GPUFilter2D};
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4) && dstType == srcType);
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
oclMat gpu_krnl;
int nDivisor;
class SeparableFilterEngine_GPU : public FilterEngine_GPU
{
public:
- SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter_,
- const Ptr<BaseColumnFilter_GPU>& columnFilter_) :
+ SeparableFilterEngine_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter_,
+ const Ptr<BaseColumnFilter_GPU> &columnFilter_) :
rowFilter(rowFilter_), columnFilter(columnFilter_)
{
ksize = Size(rowFilter->ksize, columnFilter->ksize);
Size src_size = src.size();
int src_type = src.type();
- int cn = src.channels();
+ int cn = src.oclchannels();
//dst.create(src_size, src_type);
dst = Scalar(0.0);
//dstBuf.create(src_size, src_type);
};
}
-Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
- const Ptr<BaseColumnFilter_GPU>& columnFilter)
+Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
+ const Ptr<BaseColumnFilter_GPU> &columnFilter)
{
return Ptr<FilterEngine_GPU>(new SeparableFilterEngine_GPU(rowFilter, columnFilter));
}
Ptr<BaseFilter_GPU> cv::ocl::getBoxFilter_GPU(int srcType, int dstType,
const Size &ksize, Point anchor, int borderType)
{
- static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, 0, GPUFilterBox_8u_C4R},
- {0, GPUFilterBox_32F_C1R, 0, 0, GPUFilterBox_32F_C4R}
+ static const FilterBox_t FilterBox_callers[2][5] = {{0, GPUFilterBox_8u_C1R, 0, GPUFilterBox_8u_C4R, GPUFilterBox_8u_C4R},
+ {0, GPUFilterBox_32F_C1R, 0, GPUFilterBox_32F_C4R, GPUFilterBox_32F_C4R}
};
//Remove this check if more data types need to be supported.
- CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4 || srcType == CV_32FC1 || srcType == CV_32FC4)
- && dstType == srcType);
+ CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC3 || srcType == CV_8UC4 || srcType == CV_32FC1 ||
+ srcType == CV_32FC3 || srcType == CV_32FC4) && dstType == srcType);
normalizeAnchor(anchor, ksize);
void linearRowFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
{
Context *clCxt = src.clCxt;
- int channels = src.channels();
+ int channels = src.oclchannels();
size_t localThreads[3] = {16, 16, 1};
string kernelName = "row_filter";
//sanity checks
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src.cols == dst.cols);
- CV_Assert(src.channels() == dst.channels());
+ CV_Assert(src.oclchannels() == dst.oclchannels());
CV_Assert(ksize == (anchor << 1) + 1);
int src_pix_per_row, dst_pix_per_row;
int src_offset_x, src_offset_y, dst_offset_in_pixel;
void linearColumnFilter_gpu(const oclMat &src, const oclMat &dst, oclMat mat_kernel, int ksize, int anchor, int bordertype)
{
Context *clCxt = src.clCxt;
- int channels = src.channels();
+ int channels = src.oclchannels();
size_t localThreads[3] = {16, 16, 1};
string kernelName = "col_filter";
break;
}
char compile_option[256];
-
+
size_t globalThreads[3];
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
{
case 1:
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float","uchar","convert_uchar_sat");
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float", "uchar", "convert_uchar_sat");
break;
case 2:
globalThreads[0] = ((dst.cols + 1) / 2 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float2","uchar2","convert_uchar2_sat");
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float2", "uchar2", "convert_uchar2_sat");
break;
case 3:
case 4:
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float4","uchar4","convert_uchar4_sat");
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float4", "uchar4", "convert_uchar4_sat");
break;
}
}
else
{
globalThreads[0] = (dst.cols + localThreads[0] - 1) / localThreads[0] * localThreads[0];
- switch(dst.type())
- {
- case CV_32SC1:
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float","int","convert_int_sat");
- break;
- case CV_32SC3:
- case CV_32SC4:
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float4","int4","convert_int4_sat");
- break;
- case CV_32FC1:
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float","float","");
- break;
- case CV_32FC3:
- case CV_32FC4:
- sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
- anchor, localThreads[0], localThreads[1], channels, btype,"float4","float4","");
- break;
- }
+ switch(dst.type())
+ {
+ case CV_32SC1:
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float", "int", "convert_int_sat");
+ break;
+ case CV_32SC3:
+ case CV_32SC4:
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float4", "int4", "convert_int4_sat");
+ break;
+ case CV_32FC1:
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float", "float", "");
+ break;
+ case CV_32FC3:
+ case CV_32FC4:
+ sprintf(compile_option, "-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
+ anchor, localThreads[0], localThreads[1], channels, btype, "float4", "float4", "");
+ break;
+ }
}
//sanity checks
CV_Assert(clCxt == dst.clCxt);
CV_Assert(src.cols == dst.cols);
- CV_Assert(src.channels() == dst.channels());
+ CV_Assert(src.oclchannels() == dst.oclchannels());
CV_Assert(ksize == (anchor << 1) + 1);
int src_pix_per_row, dst_pix_per_row;
int src_offset_x, src_offset_y, dst_offset_in_pixel;
args.push_back(make_pair(sizeof(cl_mem), &dst.data));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.cols));
args.push_back(make_pair(sizeof(cl_int), (void *)&dst.rows));
- args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholecols));
- args.push_back(make_pair(sizeof(cl_int),(void*)&src.wholerows));
+ args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholecols));
+ args.push_back(make_pair(sizeof(cl_int), (void *)&src.wholerows));
args.push_back(make_pair(sizeof(cl_int), (void *)&src_pix_per_row));
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_x));
//args.push_back(make_pair(sizeof(cl_int),(void*)&src_offset_y));
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
{
- if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi
- {
- if((bordertype & cv::BORDER_ISOLATED) != 0)
- {
- bordertype &= ~cv::BORDER_ISOLATED;
- if((bordertype != cv::BORDER_CONSTANT) &&
- (bordertype != cv::BORDER_REPLICATE))
- {
- CV_Error(CV_StsBadArg,"unsupported border type");
- }
- }
- }
+ if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
+ {
+ if((bordertype & cv::BORDER_ISOLATED) != 0)
+ {
+ bordertype &= ~cv::BORDER_ISOLATED;
+ if((bordertype != cv::BORDER_CONSTANT) &&
+ (bordertype != cv::BORDER_REPLICATE))
+ {
+ CV_Error(CV_StsBadArg, "unsupported border type");
+ }
+ }
+ }
if( ddepth < 0 )
ddepth = src.depth();
//CV_Assert(ddepth == src.depth());
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType )
{
- Mat kx, ky;
- getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
- return createSeparableLinearFilter_GPU(srcType, dstType,
- kx, ky, Point(-1,-1), 0, borderType );
+ Mat kx, ky;
+ getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F );
+ return createSeparableLinearFilter_GPU(srcType, dstType,
+ kx, ky, Point(-1, -1), 0, borderType );
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
{
- if(src.clCxt -> impl -> double_support ==0 && src.type() == CV_64F)
+ if(src.clCxt -> impl -> double_support == 0 && src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
src.copyTo(dst);
return;
}
- if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi
- {
- if((bordertype & cv::BORDER_ISOLATED) != 0)
- {
- bordertype &= ~cv::BORDER_ISOLATED;
- if((bordertype != cv::BORDER_CONSTANT) &&
- (bordertype != cv::BORDER_REPLICATE))
- {
- CV_Error(CV_StsBadArg,"unsupported border type");
- }
- }
- }
+ if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
+ {
+ if((bordertype & cv::BORDER_ISOLATED) != 0)
+ {
+ bordertype &= ~cv::BORDER_ISOLATED;
+ if((bordertype != cv::BORDER_CONSTANT) &&
+ (bordertype != cv::BORDER_REPLICATE))
+ {
+ CV_Error(CV_StsBadArg, "unsupported border type");
+ }
+ }
+ }
dst.create(src.size(), src.type());
if( bordertype != BORDER_CONSTANT )
{
#include "clAmdBlas.h"
#if !defined (HAVE_OPENCL)
-void cv::ocl::dft(const oclMat& src, oclMat& dst, int flags) { throw_nogpu(); }
+void cv::ocl::dft(const oclMat &src, oclMat &dst, int flags)
+{
+ throw_nogpu();
+}
#else
using namespace cv;
- void cv::ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha,
- const oclMat& src3, double beta, oclMat& dst, int flags)
- {
- CV_Assert(src1.cols == src2.rows &&
- (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
- CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
- if(!src3.empty())
- {
- src3.copyTo(dst);
- }
- else
- {
- dst.create(src1.rows, src2.cols, src1.type());
- dst.setTo(Scalar::all(0));
- }
- openCLSafeCall( clAmdBlasSetup() );
-
- const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
- const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags)?clAmdBlasTrans:clAmdBlasNoTrans;
- const clAmdBlasOrder order = clAmdBlasRowMajor;
+void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
+ const oclMat &src3, double beta, oclMat &dst, int flags)
+{
+ CV_Assert(src1.cols == src2.rows &&
+ (src3.empty() || src1.rows == src3.rows && src2.cols == src3.cols));
+ CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
+ if(!src3.empty())
+ {
+ src3.copyTo(dst);
+ }
+ else
+ {
+ dst.create(src1.rows, src2.cols, src1.type());
+ dst.setTo(Scalar::all(0));
+ }
+ openCLSafeCall( clAmdBlasSetup() );
- const int M = src1.rows;
- const int N = src2.cols;
- const int K = src1.cols;
- int lda = src1.step;
- int ldb = src2.step;
- int ldc = dst.step;
- int offa = src1.offset;
- int offb = src2.offset;
- int offc = dst.offset;
+ const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
+ const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
+ const clAmdBlasOrder order = clAmdBlasRowMajor;
+ const int M = src1.rows;
+ const int N = src2.cols;
+ const int K = src1.cols;
+ int lda = src1.step;
+ int ldb = src2.step;
+ int ldc = dst.step;
+ int offa = src1.offset;
+ int offb = src2.offset;
+ int offc = dst.offset;
- switch(src1.type())
- {
- case CV_32FC1:
- lda /= sizeof(float);
- ldb /= sizeof(float);
- ldc /= sizeof(float);
- offa /= sizeof(float);
- offb /= sizeof(float);
- offc /= sizeof(float);
- openCLSafeCall
- (
- clAmdBlasSgemmEx(order, transA, transB, M, N, K,
- alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
- );
- break;
- case CV_64FC1:
- lda /= sizeof(double);
- ldb /= sizeof(double);
- ldc /= sizeof(double);
- offa /= sizeof(double);
- offb /= sizeof(double);
- offc /= sizeof(double);
- openCLSafeCall
- (
- clAmdBlasDgemmEx(order, transA, transB, M, N, K,
- alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
- );
- break;
- case CV_32FC2:
- {
- lda /= sizeof(std::complex<float>);
- ldb /= sizeof(std::complex<float>);
- ldc /= sizeof(std::complex<float>);
- offa /= sizeof(std::complex<float>);
- offb /= sizeof(std::complex<float>);
- offc /= sizeof(std::complex<float>);
- cl_float2 alpha_2 = {{alpha, 0}};
- cl_float2 beta_2 = {{beta, 0}};
- openCLSafeCall
- (
- clAmdBlasCgemmEx(order, transA, transB, M, N, K,
- alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
- );
- }
- break;
- case CV_64FC2:
- {
- lda /= sizeof(std::complex<double>);
- ldb /= sizeof(std::complex<double>);
- ldc /= sizeof(std::complex<double>);
- offa /= sizeof(std::complex<double>);
- offb /= sizeof(std::complex<double>);
- offc /= sizeof(std::complex<double>);
- cl_double2 alpha_2 = {{alpha, 0}};
- cl_double2 beta_2 = {{beta, 0}};
- openCLSafeCall
- (
- clAmdBlasZgemmEx(order, transA, transB, M, N, K,
- alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
- beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
- );
- }
- break;
- }
- clAmdBlasTeardown();
- }
+
+ switch(src1.type())
+ {
+ case CV_32FC1:
+ lda /= sizeof(float);
+ ldb /= sizeof(float);
+ ldc /= sizeof(float);
+ offa /= sizeof(float);
+ offb /= sizeof(float);
+ offc /= sizeof(float);
+ openCLSafeCall
+ (
+ clAmdBlasSgemmEx(order, transA, transB, M, N, K,
+ alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
+ beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ );
+ break;
+ case CV_64FC1:
+ lda /= sizeof(double);
+ ldb /= sizeof(double);
+ ldc /= sizeof(double);
+ offa /= sizeof(double);
+ offb /= sizeof(double);
+ offc /= sizeof(double);
+ openCLSafeCall
+ (
+ clAmdBlasDgemmEx(order, transA, transB, M, N, K,
+ alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
+ beta, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ );
+ break;
+ case CV_32FC2:
+ {
+ lda /= sizeof(std::complex<float>);
+ ldb /= sizeof(std::complex<float>);
+ ldc /= sizeof(std::complex<float>);
+ offa /= sizeof(std::complex<float>);
+ offb /= sizeof(std::complex<float>);
+ offc /= sizeof(std::complex<float>);
+ cl_float2 alpha_2 = {{alpha, 0}};
+ cl_float2 beta_2 = {{beta, 0}};
+ openCLSafeCall
+ (
+ clAmdBlasCgemmEx(order, transA, transB, M, N, K,
+ alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
+ beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ );
+ }
+ break;
+ case CV_64FC2:
+ {
+ lda /= sizeof(std::complex<double>);
+ ldb /= sizeof(std::complex<double>);
+ ldc /= sizeof(std::complex<double>);
+ offa /= sizeof(std::complex<double>);
+ offb /= sizeof(std::complex<double>);
+ offc /= sizeof(std::complex<double>);
+ cl_double2 alpha_2 = {{alpha, 0}};
+ cl_double2 beta_2 = {{beta, 0}};
+ openCLSafeCall
+ (
+ clAmdBlasZgemmEx(order, transA, transB, M, N, K,
+ alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb,
+ beta_2, (cl_mem)dst.data, offc, ldc, 1, &src1.clCxt->impl->clCmdQueue, 0, NULL, NULL)
+ );
+ }
+ break;
+ }
+ clAmdBlasTeardown();
+}
#endif
#endif
#include "precomp.hpp"
#include <stdio.h>
+#include <string>
#ifdef EMU
#include "runCL.h"
#endif
double globaltime = 0;
-CvHaarClassifierCascade*
+CvHaarClassifierCascade *
gpuCreateHaarClassifierCascade( int stage_count )
{
CvHaarClassifierCascade *cascade = 0;
}
/* create more efficient internal representation of haar classifier cascade */
-GpuHidHaarClassifierCascade*
+GpuHidHaarClassifierCascade *
gpuCreateHidHaarClassifierCascade( CvHaarClassifierCascade *cascade, int *size, int *totalclassifier)
{
GpuHidHaarClassifierCascade *out = 0;
bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
+ //the Intel HD Graphics is unsupported
+ if (gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") != string::npos)
+ {
+ cout << " Intel HD GPU device unsupported " << endl;
+ return NULL;
+ }
+
//double t = 0;
if( maxSize.height == 0 || maxSize.width == 0 )
{
vector<float> scalev;
for(factor = 1.f;; factor *= scaleFactor)
{
- CvSize winSize = { cvRound(winSize0.width *factor), cvRound(winSize0.height *factor) };
+ CvSize winSize = { cvRound(winSize0.width * factor), cvRound(winSize0.height * factor) };
sz.width = cvRound( gimg.cols / factor ) + 1;
sz.height = cvRound( gimg.rows / factor ) + 1;
CvSize sz1 = { sz.width - winSize0.width - 1, sz.height - winSize0.height - 1 };
size_t blocksize = 8;
size_t localThreads[3] = { blocksize, blocksize , 1 };
- size_t globalThreads[3] = { grp_per_CU * ((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
+ size_t globalThreads[3] = { grp_per_CU *((gsum.clCxt)->impl->maxComputeUnits) *localThreads[0],
localThreads[1], 1
};
int outputsz = 256 * globalThreads[0] / localThreads[0];
//classifierbuffer = clCreateBuffer(gsum.clCxt->clContext,CL_MEM_READ_ONLY,sizeof(GpuHidHaarClassifier)*totalclassifier,NULL,&status);
//status = clEnqueueWriteBuffer(gsum.clCxt->clCmdQueue,classifierbuffer,1,0,sizeof(GpuHidHaarClassifier)*totalclassifier,classifier,0,NULL,NULL);
- nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,nodenum * sizeof(GpuHidHaarTreeNode));
+ nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
int argcount = 0;
//int grpnumperline = ((m + localThreads[0] - 1) / localThreads[0]);
//int totalgrp = ((n + localThreads[1] - 1) / localThreads[1])*grpnumperline;
- // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
+ // openCLVerifyKernel(gsum.clCxt, kernel, &blocksize, globalThreads, localThreads);
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_mem),(void*)&cascadebuffer));
-
- vector<pair<size_t,const void *> > args;
+
+ vector<pair<size_t, const void *> > args;
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p ));
args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq ));
args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction ));
- /*
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
- openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
+ /*
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq));
+ openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline));
//openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp));
- // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
+ // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
- // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
- //t = (double)cvGetTickCount() - t;
+ // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1);
+ //t = (double)cvGetTickCount() - t;
//printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
//t = (double)cvGetTickCount();
//openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, 0, 4 * sizeof(int)*outputsz, candidate, 0, NULL, NULL));
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
for(int i = 0; i < outputsz; i++)
- if(candidate[4*i+2] != 0)
- allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3]));
+ if(candidate[4 * i + 2] != 0)
+ allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
// t = (double)cvGetTickCount() - t;
//printf( "post time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
//t = (double)cvGetTickCount();
openCLSafeCall(clReleaseMemObject(scaleinfobuffer));
openCLSafeCall(clReleaseMemObject(nodebuffer));
openCLSafeCall(clReleaseMemObject(candidatebuffer));
- // openCLSafeCall(clReleaseKernel(kernel));
+ // openCLSafeCall(clReleaseKernel(kernel));
//t = (double)cvGetTickCount() - t;
//printf( "release time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
}
cvRound(factor * winsize0.height) < gimg.rows - 10;
n_factors++, factor *= scaleFactor )
{
- CvSize winSize = { cvRound( winsize0.width *factor ),
- cvRound( winsize0.height *factor )
+ CvSize winSize = { cvRound( winsize0.width * factor ),
+ cvRound( winsize0.height * factor )
};
if( winSize.width < minSize.width || winSize.height < minSize.height )
{
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
sizeof(GpuHidHaarStageClassifier) * gcascade->count - sizeof(GpuHidHaarClassifier) * totalclassifier) / sizeof(GpuHidHaarTreeNode);
nodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY,
- nodenum * sizeof(GpuHidHaarTreeNode));
+ nodenum * sizeof(GpuHidHaarTreeNode));
//openCLVerifyCall(status);
openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, nodebuffer, 1, 0,
nodenum * sizeof(GpuHidHaarTreeNode),
node, 0, NULL, NULL));
cl_mem newnodebuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_WRITE,
- loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
+ loopcount * nodenum * sizeof(GpuHidHaarTreeNode));
int startstage = 0;
int endstage = gcascade->count;
//cl_kernel kernel;
int startnodenum = nodenum * i;
int argcounts = 0;
float factor2 = (float)factor;
- /*
- openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
- openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
- openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
- openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
- openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
- */
-
- vector<pair<size_t,const void *> > args1;
+ /*
+ openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer));
+ openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer));
+ openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2));
+ openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i]));
+ openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum));
+ */
+
+ vector<pair<size_t, const void *> > args1;
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer ));
args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&factor2 ));
args1.push_back ( make_pair(sizeof(cl_float) , (void *)&correction[i] ));
args1.push_back ( make_pair(sizeof(cl_int) , (void *)&startnodenum ));
-
- size_t globalThreads2[3] = {nodenum,1,1};
- size_t localThreads2[3] = {256,1,1};
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
+
+ size_t globalThreads2[3] = {nodenum, 1, 1};
+ size_t localThreads2[3] = {256, 1, 1};
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuscaleclassifier", globalThreads2, NULL/*localThreads2*/, args1, -1, -1);
//clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel2, 1, NULL, globalThreads2, 0, 0, NULL, NULL);
//clFinish(gsum.clCxt->impl->clCmdQueue);
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer));
openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/
- vector<pair<size_t,const void *> > args;
+ vector<pair<size_t, const void *> > args;
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&scaleinfobuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&newnodebuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&pbuffer ));
args.push_back ( make_pair(sizeof(cl_mem) , (void *)&correctionbuffer ));
args.push_back ( make_pair(sizeof(cl_int) , (void *)&nodenum ));
-
-
- openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
+
+
+ openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1);
//openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL));
//openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue));
for(int i = 0; i < outputsz; i++)
{
- if(candidate[4*i+2] != 0)
- allCandidates.push_back(Rect(candidate[4*i], candidate[4*i+1], candidate[4*i+2], candidate[4*i+3]));
+ if(candidate[4 * i + 2] != 0)
+ allCandidates.push_back(Rect(candidate[4 * i], candidate[4 * i + 1], candidate[4 * i + 2], candidate[4 * i + 3]));
}
free(scaleinfo);
}
-CvHaarClassifierCascade*
+CvHaarClassifierCascade *
gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size )
{
int i;
assert( count > 0 );
cascade->stage_classifier[i].count = count;
cascade->stage_classifier[i].classifier =
- (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
+ (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0]));
for( j = 0; j < count; j++ )
{
stage += dl;
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
- classifier->count * ( sizeof( *classifier->haar_feature ) +
- sizeof( *classifier->threshold ) +
- sizeof( *classifier->left ) +
- sizeof( *classifier->right ) ) +
- (classifier->count + 1) * sizeof( *classifier->alpha ) );
+ classifier->count * ( sizeof( *classifier->haar_feature ) +
+ sizeof( *classifier->threshold ) +
+ sizeof( *classifier->left ) +
+ sizeof( *classifier->right ) ) +
+ (classifier->count + 1) * sizeof( *classifier->alpha ) );
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
classifier->left = (int *) (classifier->threshold + classifier->count);
classifier->right = (int *) (classifier->left + classifier->count);
CvRect r;
int band = 0;
sscanf( stage, "%d%d%d%d%d%f%n",
- &r.x, &r.y, &r.width, &r.height, &band,
- &(classifier->haar_feature[l].rect[k].weight), &dl );
+ &r.x, &r.y, &r.width, &r.height, &band,
+ &(classifier->haar_feature[l].rect[k].weight), &dl );
stage += dl;
classifier->haar_feature[l].rect[k].r = r;
}
for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ )
{
memset( classifier->haar_feature[l].rect + k, 0,
- sizeof(classifier->haar_feature[l].rect[k]) );
+ sizeof(classifier->haar_feature[l].rect[k]) );
}
sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]),
- &(classifier->left[l]),
- &(classifier->right[l]), &dl );
+ &(classifier->left[l]),
+ &(classifier->right[l]), &dl );
stage += dl;
}
for( l = 0; l <= classifier->count; l++ )
#define _MAX_PATH 1024
#endif
-CV_IMPL CvHaarClassifierCascade*
+CV_IMPL CvHaarClassifierCascade *
gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size )
{
const char **input_cascade = 0;
return CV_IS_HAAR_CLASSIFIER( struct_ptr );
}
-void*
+void *
gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node )
{
CvHaarClassifierCascade *cascade = NULL;
trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME );
if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag )
- || trees_fn->data.seq->total <= 0 )
+ || trees_fn->data.seq->total <= 0 )
{
sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i );
CV_Error( CV_StsError, buf );
}
cascade->stage_classifier[i].classifier =
- (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
- * sizeof( cascade->stage_classifier[i].classifier[0] ) );
+ (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total
+ * sizeof( cascade->stage_classifier[i].classifier[0] ) );
for( j = 0; j < trees_fn->data.seq->total; ++j )
{
cascade->stage_classifier[i].classifier[j].haar_feature = NULL;
if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 )
{
sprintf( buf, "Tree node is not a valid sequence."
- " (stage %d, tree %d)", i, j );
+ " (stage %d, tree %d)", i, j );
CV_Error( CV_StsError, buf );
}
classifier->count = tree_fn->data.seq->total;
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
- classifier->count * ( sizeof( *classifier->haar_feature ) +
- sizeof( *classifier->threshold ) +
- sizeof( *classifier->left ) +
- sizeof( *classifier->right ) ) +
- (classifier->count + 1) * sizeof( *classifier->alpha ) );
+ classifier->count * ( sizeof( *classifier->haar_feature ) +
+ sizeof( *classifier->threshold ) +
+ sizeof( *classifier->left ) +
+ sizeof( *classifier->right ) ) +
+ (classifier->count + 1) * sizeof( *classifier->alpha ) );
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
classifier->left = (int *) (classifier->threshold + classifier->count);
classifier->right = (int *) (classifier->left + classifier->count);
if( !CV_NODE_IS_MAP( node_fn->tag ) )
{
sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)",
- k, i, j );
+ k, i, j );
CV_Error( CV_StsError, buf );
}
feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME );
if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) )
{
sprintf( buf, "Feature node is not a valid map. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME );
if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag )
- || rects_fn->data.seq->total < 1
- || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
+ || rects_fn->data.seq->total < 1
+ || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX )
{
sprintf( buf, "Rects node is not a valid sequence. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
cvStartReadSeq( rects_fn->data.seq, &rects_reader );
if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 )
{
sprintf( buf, "Rect %d is not a valid sequence. "
- "(stage %d, tree %d, node %d)", l, i, j, k );
+ "(stage %d, tree %d, node %d)", l, i, j, k );
CV_Error( CV_StsError, buf );
}
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
{
sprintf( buf, "x coordinate must be non-negative integer. "
- "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
+ "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
CV_Error( CV_StsError, buf );
}
r.x = fn->data.i;
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 )
{
sprintf( buf, "y coordinate must be non-negative integer. "
- "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
+ "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
CV_Error( CV_StsError, buf );
}
r.y = fn->data.i;
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 );
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
- || r.x + fn->data.i > cascade->orig_window_size.width )
+ || r.x + fn->data.i > cascade->orig_window_size.width )
{
sprintf( buf, "width must be positive integer and "
- "(x + width) must not exceed window width. "
- "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
+ "(x + width) must not exceed window width. "
+ "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
CV_Error( CV_StsError, buf );
}
r.width = fn->data.i;
fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 );
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0
- || r.y + fn->data.i > cascade->orig_window_size.height )
+ || r.y + fn->data.i > cascade->orig_window_size.height )
{
sprintf( buf, "height must be positive integer and "
- "(y + height) must not exceed window height. "
- "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
+ "(y + height) must not exceed window height. "
+ "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
CV_Error( CV_StsError, buf );
}
r.height = fn->data.i;
if( !CV_NODE_IS_REAL( fn->tag ) )
{
sprintf( buf, "weight must be real number. "
- "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
+ "(stage %d, tree %d, node %d, rect %d)", i, j, k, l );
CV_Error( CV_StsError, buf );
}
if( !fn || !CV_NODE_IS_INT( fn->tag ) )
{
sprintf( buf, "tilted must be 0 or 1. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
classifier->haar_feature[k].tilted = ( fn->data.i != 0 );
if( !fn || !CV_NODE_IS_REAL( fn->tag ) )
{
sprintf( buf, "threshold must be real number. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
classifier->threshold[k] = (float) fn->data.f;
if( fn )
{
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
- || fn->data.i >= tree_fn->data.seq->total )
+ || fn->data.i >= tree_fn->data.seq->total )
{
sprintf( buf, "left node must be valid node number. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
/* left node */
if( !fn )
{
sprintf( buf, "left node or left value must be specified. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
if( !CV_NODE_IS_REAL( fn->tag ) )
{
sprintf( buf, "left value must be real number. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
/* left value */
if( last_idx >= classifier->count + 1 )
{
sprintf( buf, "Tree structure is broken: too many values. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
classifier->left[k] = -last_idx;
if( fn )
{
if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k
- || fn->data.i >= tree_fn->data.seq->total )
+ || fn->data.i >= tree_fn->data.seq->total )
{
sprintf( buf, "right node must be valid node number. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
/* right node */
if( !fn )
{
sprintf( buf, "right node or right value must be specified. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
if( !CV_NODE_IS_REAL( fn->tag ) )
{
sprintf( buf, "right value must be real number. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
/* right value */
if( last_idx >= classifier->count + 1 )
{
sprintf( buf, "Tree structure is broken: too many values. "
- "(stage %d, tree %d, node %d)", i, j, k );
+ "(stage %d, tree %d, node %d)", i, j, k );
CV_Error( CV_StsError, buf );
}
classifier->right[k] = -last_idx;
if( last_idx != classifier->count + 1 )
{
sprintf( buf, "Tree structure is broken: too few values. "
- "(stage %d, tree %d)", i, j );
+ "(stage %d, tree %d)", i, j );
CV_Error( CV_StsError, buf );
}
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME );
if( !fn || !CV_NODE_IS_INT( fn->tag )
- || fn->data.i < -1 || fn->data.i >= cascade->count )
+ || fn->data.i < -1 || fn->data.i >= cascade->count )
{
sprintf( buf, "parent must be integer number. (stage %d)", i );
CV_Error( CV_StsError, buf );
parent = fn->data.i;
fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME );
if( !fn || !CV_NODE_IS_INT( fn->tag )
- || fn->data.i < -1 || fn->data.i >= cascade->count )
+ || fn->data.i < -1 || fn->data.i >= cascade->count )
{
sprintf( buf, "next must be integer number. (stage %d)", i );
CV_Error( CV_StsError, buf );
void
gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr,
-CvAttrList attributes )
+ CvAttrList attributes )
{
int i, j, k, l;
char buf[256];
else
{
cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME,
- tree->alpha[-tree->left[k]] );
+ tree->alpha[-tree->left[k]] );
}
if( tree->right[k] > 0 )
else
{
cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME,
- tree->alpha[-tree->right[k]] );
+ tree->alpha[-tree->right[k]] );
}
cvEndWriteStruct( fs ); /* split */
cvEndWriteStruct( fs ); /* root */
}
-void*
+void *
gpuCloneHaarClassifier( const void *struct_ptr )
{
CvHaarClassifierCascade *cascade = NULL;
int i, j, k, n;
const CvHaarClassifierCascade *cascade_src =
- (const CvHaarClassifierCascade *) struct_ptr;
+ (const CvHaarClassifierCascade *) struct_ptr;
n = cascade_src->count;
cascade = gpuCreateHaarClassifierCascade(n);
cascade->stage_classifier[i].count = 0;
cascade->stage_classifier[i].classifier =
- (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
- * sizeof( cascade->stage_classifier[i].classifier[0] ) );
+ (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count
+ * sizeof( cascade->stage_classifier[i].classifier[0] ) );
cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count;
for( j = 0; j < cascade->stage_classifier[i].count; ++j )
{
const CvHaarClassifier *classifier_src =
- &cascade_src->stage_classifier[i].classifier[j];
+ &cascade_src->stage_classifier[i].classifier[j];
CvHaarClassifier *classifier =
- &cascade->stage_classifier[i].classifier[j];
+ &cascade->stage_classifier[i].classifier[j];
classifier->count = classifier_src->count;
classifier->haar_feature = (CvHaarFeature *) cvAlloc(
- classifier->count * ( sizeof( *classifier->haar_feature ) +
- sizeof( *classifier->threshold ) +
- sizeof( *classifier->left ) +
- sizeof( *classifier->right ) ) +
- (classifier->count + 1) * sizeof( *classifier->alpha ) );
+ classifier->count * ( sizeof( *classifier->haar_feature ) +
+ sizeof( *classifier->threshold ) +
+ sizeof( *classifier->left ) +
+ sizeof( *classifier->right ) ) +
+ (classifier->count + 1) * sizeof( *classifier->alpha ) );
classifier->threshold = (float *) (classifier->haar_feature + classifier->count);
classifier->left = (int *) (classifier->threshold + classifier->count);
classifier->right = (int *) (classifier->left + classifier->count);
classifier->alpha[k] = classifier_src->alpha[k];
}
classifier->alpha[classifier->count] =
- classifier_src->alpha[classifier->count];
+ classifier_src->alpha[classifier->count];
}
}
#if 0
CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier,
-(CvReleaseFunc)gpuReleaseHaarClassifierCascade,
-gpuReadHaarClassifier, gpuWriteHaarClassifier,
-gpuCloneHaarClassifier );
+ (CvReleaseFunc)gpuReleaseHaarClassifierCascade,
+ gpuReadHaarClassifier, gpuWriteHaarClassifier,
+ gpuCloneHaarClassifier );
namespace cv
}
void HaarClassifierCascade::detectMultiScale( const Mat &image,
- Vector<Rect>& objects, double scaleFactor,
- int minNeighbors, int flags,
- Size minSize )
+ Vector<Rect> &objects, double scaleFactor,
+ int minNeighbors, int flags,
+ Size minSize )
{
MemStorage storage(cvCreateMemStorage(0));
CvMat _image = image;
CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor,
- minNeighbors, flags, minSize );
+ minNeighbors, flags, minSize );
Seq<Rect>(_objects).copyTo(objects);
}
}
void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum,
- const Mat &tilted, double scale )
+ const Mat &tilted, double scale )
{
CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted;
gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale );
CV_INLINE
double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier,
-double variance_norm_factor,
-size_t p_offset )
+ double variance_norm_factor,
+ size_t p_offset )
{
/*
int idx = 0;
CV_IMPL int
gpuRunHaarClassifierCascade( const CvHaarClassifierCascade *_cascade,
-CvPoint pt, int start_stage )
+ CvPoint pt, int start_stage )
{
/*
int result = -1;
struct gpuHaarDetectObjects_ScaleImage_Invoker
{
gpuHaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade *_cascade,
- int _stripSize, double _factor,
- const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
- Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
+ int _stripSize, double _factor,
+ const Mat &_sum1, const Mat &_sqsum1, Mat *_norm1,
+ Mat *_mask1, Rect _equRect, ConcurrentRectVector &_vec )
{
cascade = _cascade;
stripSize = _stripSize;
{
if( gpuRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 ) > 0 )
vec->push_back(Rect(cvRound(x * factor), cvRound(y * factor),
- winSize.width, winSize.height));
+ winSize.width, winSize.height));
}
}
struct gpuHaarDetectObjects_ScaleCascade_Invoker
{
gpuHaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade *_cascade,
- Size _winsize, const Range &_xrange, double _ystep,
- size_t _sumstep, const int **_p, const int **_pq,
- ConcurrentRectVector &_vec )
+ Size _winsize, const Range &_xrange, double _ystep,
+ size_t _sumstep, const int **_p, const int **_pq,
+ ConcurrentRectVector &_vec )
{
cascade = _cascade;
winsize = _winsize;
#if !defined (HAVE_OPENCL)
-cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) { throw_nogpu(); }
-size_t cv::ocl::HOGDescriptor::getDescriptorSize() const { throw_nogpu(); return 0; }
-size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); return 0; }
-double cv::ocl::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
-bool cv::ocl::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
-void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
-void cv::ocl::HOGDescriptor::detect(const oclMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
-void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
-void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat&) { throw_nogpu(); }
-void cv::ocl::HOGDescriptor::getDescriptors(const oclMat&, Size, oclMat&, int) { throw_nogpu(); }
-std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }
-std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }
+cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int)
+{
+ throw_nogpu();
+}
+size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
+{
+ throw_nogpu();
+ return 0;
+}
+size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const
+{
+ throw_nogpu();
+ return 0;
+}
+double cv::ocl::HOGDescriptor::getWinSigma() const
+{
+ throw_nogpu();
+ return 0;
+}
+bool cv::ocl::HOGDescriptor::checkDetectorSize() const
+{
+ throw_nogpu();
+ return false;
+}
+void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::HOGDescriptor::detect(const oclMat &, vector<Point> &, double, Size, Size)
+{
+ throw_nogpu();
+}
+void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &, vector<Rect> &, double, Size, Size, double, int)
+{
+ throw_nogpu();
+}
+void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &, Size, oclMat &, int)
+{
+ throw_nogpu();
+}
+std::vector<float> cv::ocl::HOGDescriptor::getDefaultPeopleDetector()
+{
+ throw_nogpu();
+ return std::vector<float>();
+}
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
+{
+ throw_nogpu();
+ return std::vector<float>();
+}
+std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
+{
+ throw_nogpu();
+ return std::vector<float>();
+}
#else
#define CELLS_PER_BLOCK_Y 2
#define NTHREADS 256
-namespace cv { namespace ocl
+namespace cv
{
- ///////////////////////////OpenCL kernel strings///////////////////////////
- extern const char *objdetect_hog;
-}}
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *objdetect_hog;
+ }
+}
-namespace cv { namespace ocl { namespace device
+namespace cv
{
- namespace hog
+ namespace ocl
{
- int cnbins;
- int cblock_stride_x;
- int cblock_stride_y;
- int cnblocks_win_x;
- int cnblocks_win_y;
- int cblock_hist_size;
- int cblock_hist_size_2up;
- int cdescr_size;
- int cdescr_width;
-
- void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
- int nblocks_win_x, int nblocks_win_y);
-
- void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
- int height, int width, const cv::ocl::oclMat& grad,
- const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists);
-
- void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
- int height, int width, cv::ocl::oclMat& block_hists, float threshold);
-
- void classify_hists(int win_height, int win_width, int block_stride_y,
- int block_stride_x, int win_stride_y, int win_stride_x, int height,
- int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef,
- float threshold, cv::ocl::oclMat& labels);
-
- void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists,
- cv::ocl::oclMat& descriptors);
- void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat& block_hists,
- cv::ocl::oclMat& descriptors);
-
- void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img,
- float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
- void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
- float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma);
-
- void resize( const oclMat &src, oclMat &dst, const Size sz);
+ namespace device
+ {
+ namespace hog
+ {
+ int cnbins;
+ int cblock_stride_x;
+ int cblock_stride_y;
+ int cnblocks_win_x;
+ int cnblocks_win_y;
+ int cblock_hist_size;
+ int cblock_hist_size_2up;
+ int cdescr_size;
+ int cdescr_width;
+
+ void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
+ int nblocks_win_x, int nblocks_win_y);
+
+ void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
+ int height, int width, const cv::ocl::oclMat &grad,
+ const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists);
+
+ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
+ int height, int width, cv::ocl::oclMat &block_hists, float threshold);
+
+ void classify_hists(int win_height, int win_width, int block_stride_y,
+ int block_stride_x, int win_stride_y, int win_stride_x, int height,
+ int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
+ float threshold, cv::ocl::oclMat &labels);
+
+ void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
+ int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
+ cv::ocl::oclMat &descriptors);
+ void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
+ int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists,
+ cv::ocl::oclMat &descriptors);
+
+ void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
+ float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
+ void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
+ float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma);
+
+ void resize( const oclMat &src, oclMat &dst, const Size sz);
+ }
+ }
}
-}}}
+}
using namespace ::cv::ocl::device;
cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_,
int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_)
- : win_size(win_size_),
- block_size(block_size_),
- block_stride(block_stride_),
- cell_size(cell_size_),
- nbins(nbins_),
- win_sigma(win_sigma_),
- threshold_L2hys(threshold_L2hys_),
- gamma_correction(gamma_correction_),
- nlevels(nlevels_)
+ : win_size(win_size_),
+ block_size(block_size_),
+ block_stride(block_stride_),
+ cell_size(cell_size_),
+ nbins(nbins_),
+ win_sigma(win_sigma_),
+ threshold_L2hys(threshold_L2hys_),
+ gamma_correction(gamma_correction_),
+ nlevels(nlevels_)
{
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
(win_size.height - block_size.height) % block_stride.height == 0);
return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1;
}
-void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float>& _detector)
+void cv::ocl::HOGDescriptor::setSVMDetector(const vector<float> &_detector)
{
std::vector<float> detector_reordered(_detector.size());
for (int i = 0; i < blocks_per_img.height; ++i)
for (int j = 0; j < blocks_per_img.width; ++j)
{
- const float* src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
- float* dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
+ const float *src = &_detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
+ float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
for (size_t k = 0; k < block_hist_size; ++k)
dst[k] = src[k];
}
CV_Assert(checkDetectorSize());
}
-void cv::ocl::HOGDescriptor::init_buffer(const oclMat& img, Size win_stride)
+void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
{
if (!image_scale.empty())
return;
labels.create(1, wins_per_img.area(), CV_8U);
}
-void cv::ocl::HOGDescriptor::computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle)
+void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
}
-void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat& img)
+void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
{
computeGradient(img, grad, qangle);
- hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
- grad, qangle, (float)getWinSigma(), block_hists);
+ hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
+ grad, qangle, (float)getWinSigma(), block_hists);
- hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
- block_hists, (float)threshold_L2hys);
+ hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width,
+ block_hists, (float)threshold_L2hys);
}
-void cv::ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format)
+void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format)
{
CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0);
{
case DESCR_FORMAT_ROW_BY_ROW:
hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width,
- win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
+ win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break;
case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width,
- win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
+ win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors);
break;
default:
CV_Error(CV_StsBadArg, "Unknown descriptor format");
}
-void cv::ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
+void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector<Point> &hits, double hit_threshold, Size win_stride, Size padding)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(padding == Size(0, 0));
detector, (float)free_coef, (float)hit_threshold, labels);
labels.download(labels_host);
- unsigned char* vec = labels_host.ptr();
+ unsigned char *vec = labels_host.ptr();
Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride);
for (int i = 0; i < wins_per_img.area(); i++)
{
-void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& found_locations, double hit_threshold,
- Size win_stride, Size padding, double scale0, int group_threshold)
+void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector<Rect> &found_locations, double hit_threshold,
+ Size win_stride, Size padding, double scale0, int group_threshold)
{
CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4);
CV_Assert(scale0 > 1);
for (levels = 0; levels < nlevels; levels++)
{
level_scale.push_back(scale);
- if (cvRound(img.cols/scale) < win_size.width ||
- cvRound(img.rows/scale) < win_size.height || scale0 <= 1)
+ if (cvRound(img.cols / scale) < win_size.width ||
+ cvRound(img.rows / scale) < win_size.height || scale0 <= 1)
break;
scale *= scale0;
}
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector48x96()
{
- static const float detector[] = {
+ static const float detector[] =
+ {
0.294350f, -0.098796f, -0.129522f, 0.078753f, 0.387527f, 0.261529f,
0.145939f, 0.061520f, 0.328699f, 0.227148f, -0.066467f, -0.086723f,
0.047559f, 0.106714f, 0.037897f, 0.111461f, -0.024406f, 0.304769f,
0.099937f, 0.091059f, 0.247307f, 0.204226f, -0.042753f, -0.068580f,
-0.119002f, 0.026722f, 0.034853f, -0.060934f, -0.025054f, -0.093026f,
-0.035372f, -0.233209f, -0.049869f, -0.039151f, -0.022279f, -0.065380f,
- -9.063785f };
- return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
+ -9.063785f
+ };
+ return vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
}
std::vector<float> cv::ocl::HOGDescriptor::getPeopleDetector64x128()
{
- static const float detector[] = {
- 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
- 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
- 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
- 0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
- -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
- -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
- -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
- 0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
- 0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
- 0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
- 0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
- 0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
- 5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
- 0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
- 0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
- 0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
- 0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
- 0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
- 0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
- -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
- -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
- -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
- 0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
- 0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
- -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
- 0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
- -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
- 0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
- 0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
- -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
- -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
- -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
- 0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
- -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
- 0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
- 0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
- -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
- 0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
- 3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
- -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
- -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
- -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
- -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
- -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
- -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
- -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
- 0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
- 0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
- 0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
- 0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
- 0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
- 8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
- -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
- -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
- -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
- 0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
- -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
- -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
- 0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
- -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
- -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
- 0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
- 0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
- 2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
- 0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
- -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
- 6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
- 0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
- -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
- -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
- -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
- 1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
- -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
- -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
- -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
- 0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
- 0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
- 0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
- -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
- -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
- -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
- 0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
- -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
- 8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
- -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
- -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
- -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
- 9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
- 0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
- -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
- 2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
- 2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
- -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
- 0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
- -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
- 8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
- -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
- -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
- 8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
- 0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
- 0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
- 0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
- -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
- 0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
- 0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
- 0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
- -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
- -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
- 0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
- 0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
- -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
- 1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
- 0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
- -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
- -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
- -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
- 0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
- -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
- 0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
- -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
- 0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
- 0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
- -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
- -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
- 0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
- 9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
- -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
- -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
- -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
- -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
- 0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
- -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
- 7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
- -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
- 7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
- 0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
- 0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
- 0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
- -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
- -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
- 0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
- -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
- 0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
- 0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
- -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
- 0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
- -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
- 0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
- -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
- -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
- -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
- -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
- -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
- 0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
- 0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
- -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
- 0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
- 0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
- 0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
- -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
- -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
- 0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
- -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
- 8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
- -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
- -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
- -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
- -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
- 0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
- 0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
- 0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
- 0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
- 0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
- 0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
- -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
- 0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
- 0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
- 0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
- 0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
- -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
- 0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
- 4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
- -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
- -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
- 0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
- 0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
- 0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
- 0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
- -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
- 0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
- -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
- -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
- -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
- 0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
- -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
- -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
- -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
- -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
- -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
- 8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
- -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
- -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
- -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
- 3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
- -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
- -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
- -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
- 0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
- 1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
- 0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
- -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
- -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
- -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
- -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
- 0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
- -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
- 0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
- -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
- -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
- -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
- 0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
- -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
- 0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
- 0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
- -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
- -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
- 0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
- 0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
- 0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
- -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
- -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
- 0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
- -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
- 2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
- 0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
- -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
- 0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
- 0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
- 0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
- 0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
- 0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
- -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
- -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
- 0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
- -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
- 0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
- -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
- 0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
- -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
- -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
- 0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
- 5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
- 0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
- -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
- 0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
- 0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
- 0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
- 0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
- 0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
- -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
- -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
- -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
- -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
- 0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
- -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
- 0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
- 7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
- 8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
- -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
- -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
- -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
- -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
- -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
- 0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
- 0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
- 0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
- -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
- 5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
- -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
- -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
- -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
- 0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
- 0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
- 0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
- 0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
- -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
- 0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
- 0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
- 0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
- 0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
- -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
- -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
- 0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
- -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
- -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
- -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
- 0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
- -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
- -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
- -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
- -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
- 0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
- 0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
- -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
- -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
- -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
- -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
- 0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
- -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
- -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
- -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
- 0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
- -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
- 0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
- -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
- 0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
- 6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
- -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
- -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
- 0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
- -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
- -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
- -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
- 0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
- -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
- 8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
- -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
- -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
- -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
- 0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
- 0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
- 0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
- -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
- -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
- -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
- -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
- 0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
- 0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
- -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
- -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
- 0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
- 0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
- -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
- -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
- -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
- 7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
- -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
- -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
- -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
- -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
- 0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
- 0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
- 6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
- 0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
- 0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
- -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
- 0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
- 0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
- 0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
- 0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
- -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
- 0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
- -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
- 2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
- 0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
- 0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
- 0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
- 0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
- 0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
- 0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
- 0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
- -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
- -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
- -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
- 0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
- -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
- -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
- -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
- 8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
- -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
- -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
- 0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
- -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
- -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
- -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
- -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
- 0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
- 0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
- 0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
- -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
- 0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
- 6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
- -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
- 0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
- -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
- 0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
- 0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
- -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
- 0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
- -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
- 0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
- -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
- 0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
- 3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
- -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
- 0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
- -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
- 6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
- -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
- 1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
- 0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
- -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
- 1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
- 0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
- 0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
- -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
- -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
- 0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
- -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
- 0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
- 8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
- 0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
- -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
- 0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
- -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
- 0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
- -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
- -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
- 0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
- -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
- 0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
- -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
- 0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
- -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
- -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
- -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
- -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
- -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
- 8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
- 0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
- 0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
- 0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
- 0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
- 0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
- 0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
- 0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
- 0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
- -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
- -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
- 0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
- 0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
- 0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
- 0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
- 0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
- -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
- 0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
- 2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
- 0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
- -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
- -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
- 7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
- -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
- -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
- -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
- -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
- -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
- -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
- 0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
- -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
- 0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
- 0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
- 0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
- -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
- -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
- -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
- 0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
- -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
- 0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
- 0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
- 0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
- -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
- 8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
- 0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
- 0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
- 0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
- -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
- 0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
- -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
- 0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
- -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
- 0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
- -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
- 0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
- -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
- -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
- -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
- 0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
- -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
- 0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
- -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
- 0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
- -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
- -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
- -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
- 0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
- 0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
- 0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
- -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
- -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
- 0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
- 0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
- 0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
- -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
- 0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
- -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
- 4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
- -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
- 9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
- -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
- 0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
- -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
- -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
- 0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
- 0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
- -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
- -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
- 0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
- 5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
- 0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
- -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
- -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
- -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
- -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
- 0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
- -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
- 0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
- -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
- 0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
- -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
- 0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
- -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
- -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
- -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
- 0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
- -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
- -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
- 0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
- -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
- -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
- -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
- 0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
- 0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
- 0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
- 0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
- -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
- 3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
- 0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
- -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
- 0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
- 0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
- -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
- -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
- 0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
- 0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
- -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
- -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
- -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
- -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
- -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
- -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
- -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
- 0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
- 0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
- -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
- 0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
- -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
- 0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
- -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
- -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
- -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
- 0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
- 0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
- 0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
- -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
- -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
- 0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
- 0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
- 0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
- -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
- 0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
- 0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
- 0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
- -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
- 0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
- 0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
- 0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
- -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
- -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
- 0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
- -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
- 0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
- -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
- -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
- -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
- -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
- 0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
- 0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
- -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
- 0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
- 7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
- 0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
- -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
- 0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
- 0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
- -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
- 0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
- 0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
- -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
- -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
- 0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
- 1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
- 0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
- 5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
- 4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
- 0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
- 0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
- -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
- -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
- 0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
- 0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
- 0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
- 0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
- 0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
- 0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
- -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
- 0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
- 0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
- 0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
- -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
- 4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
- -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
- -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
- 6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
- -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
- 0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
- 0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
- 0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
- -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
- -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
- -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
- -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
- 0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
- -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
- 0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
- 2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
- -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
- -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
- -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
- 2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
- 0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
- 0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
- 0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
- -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
- -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
- -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
- -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
- 0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
- 0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
- 0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
- -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
- -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
- -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
- -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
- -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
- 0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
- -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
- -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
- 0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
- -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
- 0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
- -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
- 0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
- 1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
- -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
- -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
- -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
- -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
- 0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
- 0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
- 0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
- 0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
- 0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
- -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
- -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
- 0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
- -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
- 0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
- -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
- 0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
- 0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
- 3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
- 0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
- 0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
- -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
- -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
- 0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
- 0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
- 0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
- -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
- 0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
- 0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
- 6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
- -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
- 0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
- -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
- 0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
- 0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
- 9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
- -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
- 0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
- -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
- 0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
- -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
- 5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
- -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
- -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
- -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
- 0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
- -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
- 0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
- -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
- 0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
- -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
- -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
- 0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
- 0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
- 8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
- -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
- -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
- 0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
- -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
- 7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
- 0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
- 0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
- -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
- 0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
- 0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
- -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
- -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
- 0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
- 8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
- -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
- 0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
- -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
- 0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
- -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
- 0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
- 0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
- -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
- 0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
- -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
- 0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
- 0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
- 0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
- -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
- -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
- 0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
- -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
- -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
- -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
- -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
- -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
- -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
- -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
- 0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
- -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
- -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
- -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
- 0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
- -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
- -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
- -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
- 0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
- 0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
- 0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
- -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
- -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
- -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
- -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
- -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
- 2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
- -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
- 0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
- 0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
- 0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
- -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
- 4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
- 0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
- 0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
- -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
- -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
- -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
- -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
- -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
- -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f };
- return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
+ static const float detector[] =
+ {
+ 0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
+ 0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
+ 0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
+ 0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
+ -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
+ -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
+ -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
+ 0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
+ 0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
+ 0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
+ 0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
+ 0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,
+ 5.24702156e-003f, 0.03236255f, 0.01407916f, 0.02207983f, 0.02537322f,
+ 0.04547948f, 0.07200756f, 0.03129894f, -0.06274468f, 0.02107014f,
+ 0.06035208f, 0.08636236f, 4.53164103e-003f, 0.02193363f, 0.02309801f,
+ 0.05568166f, -0.02645093f, 0.04448695f, 0.02837519f, 0.08975694f,
+ 0.04461516f, 0.08975355f, 0.07514391f, 0.02306982f, 0.10410084f,
+ 0.06368385f, 0.05943464f, 4.58420580e-003f, 0.05220337f, 0.06675851f,
+ 0.08358569f, 0.06712101f, 0.06559004f, -0.03930482f, -9.15936660e-003f,
+ -0.05897915f, 0.02816453f, 0.05032348f, 0.06780671f, 0.03377650f,
+ -6.09417039e-004f, -0.01795146f, -0.03083684f, -0.01302475f,
+ -0.02972313f, 7.88706727e-003f, -0.03525961f, -2.50397739e-003f,
+ 0.05245084f, 0.11791293f, -0.02167498f, 0.05299332f, 0.06640524f,
+ 0.05190265f, -8.27316567e-003f, 0.03033127f, 0.05842173f,
+ -4.01050318e-003f, -6.25105947e-003f, 0.05862958f, -0.02465461f,
+ 0.05546781f, -0.08228195f, -0.07234028f, 0.04640540f, -0.01308254f,
+ -0.02506191f, 0.03100746f, -0.04665651f, -0.04591486f, 0.02949927f,
+ 0.06035462f, 0.02244646f, -0.01698639f, 0.01040041f, 0.01131170f,
+ 0.05419579f, -0.02130277f, -0.04321722f, -0.03665198f, 0.01126490f,
+ -0.02606488f, -0.02228328f, -0.02255680f, -0.03427236f,
+ -7.75165204e-003f, -0.06195229f, 8.21638294e-003f, 0.09535975f,
+ -0.03709979f, -0.06942501f, 0.14579427f, -0.05448192f, -0.02055904f,
+ 0.05747357f, 0.02781788f, -0.07077577f, -0.05178314f, -0.10429011f,
+ -0.11235505f, 0.07529039f, -0.07559302f, -0.08786739f, 0.02983843f,
+ 0.02667585f, 0.01382199f, -0.01797496f, -0.03141199f, -0.02098101f,
+ 0.09029204f, 0.04955018f, 0.13718739f, 0.11379953f, 1.80019124e-003f,
+ -0.04577610f, -1.11108483e-003f, -0.09470536f, -0.11596080f,
+ 0.04489342f, 0.01784211f, 3.06850672e-003f, 0.10781866f,
+ 3.36498418e-003f, -0.10842580f, -0.07436839f, -0.10535070f,
+ -0.01866805f, 0.16057891f, -5.07316366e-003f, -0.04295658f,
+ -5.90488780e-003f, 8.82003549e-003f, -0.01492646f, -0.05029279f,
+ -0.12875880f, 8.78831954e-004f, -0.01297184f, -0.07592774f,
+ -0.02668831f, -6.93787413e-004f, 0.02406698f, -0.01773298f,
+ -0.03855745f, -0.05877856f, 0.03259695f, 0.12826584f, 0.06292590f,
+ -4.10733931e-003f, 0.10996531f, 0.01332991f, 0.02088735f, 0.04037504f,
+ -0.05210760f, 0.07760046f, 0.06399347f, -0.05751930f, -0.10053057f,
+ 0.07505023f, -0.02139782f, 0.01796176f, 2.34400877e-003f, -0.04208319f,
+ 0.07355055f, 0.05093350f, -0.02996780f, -0.02219072f, 0.03355330f,
+ 0.04418742f, -0.05580705f, -0.05037573f, -0.04548179f, 0.01379514f,
+ 0.02150671f, -0.02194211f, -0.13682702f, 0.05464972f, 0.01608082f,
+ 0.05309116f, 0.04701022f, 1.33690401e-003f, 0.07575664f, 0.09625306f,
+ 8.92647635e-003f, -0.02819123f, 0.10866830f, -0.03439325f,
+ -0.07092371f, -0.06004780f, -0.02712298f, -7.07467366e-003f,
+ -0.01637020f, 0.01336790f, -0.10313606f, 0.04906582f, -0.05732445f,
+ -0.02731079f, 0.01042235f, -0.08340668f, 0.03686501f, 0.06108340f,
+ 0.01322748f, -0.07809529f, 0.03774724f, -0.03413248f, -0.06096525f,
+ -0.04212124f, -0.07982176f, -1.25973229e-003f, -0.03045501f,
+ -0.01236493f, -0.06312395f, 0.04789570f, -0.04602066f, 0.08576570f,
+ 0.02521080f, 0.02988098f, 0.10314583f, 0.07060035f, 0.04520544f,
+ -0.04426654f, 0.13146530f, 0.08386490f, 0.02164590f, -2.12280243e-003f,
+ -0.03686353f, -0.02074944f, -0.03829959f, -0.01530596f, 0.02689708f,
+ 0.11867401f, -0.06043470f, -0.02785023f, -0.04775074f, 0.04878745f,
+ 0.06350956f, 0.03494788f, 0.01467400f, 1.17890188e-003f, 0.04379614f,
+ 2.03681854e-003f, -0.03958609f, -0.01072688f, 6.43705716e-003f,
+ 0.02996500f, -0.03418507f, -0.01960307f, -0.01219154f,
+ -4.37000440e-003f, -0.02549453f, 0.02646318f, -0.01632513f,
+ 6.46516960e-003f, -0.01929734f, 4.78711911e-003f, 0.04962371f,
+ 0.03809111f, 0.07265724f, 0.05758125f, -0.03741554f, 0.01648608f,
+ -8.45285598e-003f, 0.03996826f, -0.08185477f, 0.02638875f,
+ -0.04026615f, -0.02744674f, -0.04071517f, 1.05096330e-003f,
+ -0.04741232f, -0.06733172f, 8.70434940e-003f, -0.02192543f,
+ 1.35350740e-003f, -0.03056974f, -0.02975521f, -0.02887780f,
+ -0.01210713f, -0.04828526f, -0.09066251f, -0.09969629f, -0.03665164f,
+ -8.88111943e-004f, -0.06826669f, -0.01866150f, -0.03627640f,
+ -0.01408288f, 0.01874239f, -0.02075835f, 0.09145175f, -0.03547291f,
+ 0.05396780f, 0.04198981f, 0.01301925f, -0.03384354f, -0.12201976f,
+ 0.06830920f, -0.03715654f, 9.55848210e-003f, 5.05685573e-003f,
+ 0.05659294f, 3.90764466e-003f, 0.02808490f, -0.05518097f, -0.03711621f,
+ -0.02835565f, -0.04420464f, -0.01031947f, 0.01883466f,
+ -8.49525444e-003f, -0.09419250f, -0.01269387f, -0.02133371f,
+ -0.10190815f, -0.07844430f, 2.43644323e-003f, -4.09610150e-003f,
+ 0.01202551f, -0.06452291f, -0.10593818f, -0.02464746f, -0.02199699f,
+ -0.07401930f, 0.07285886f, 8.87513801e-004f, 9.97662079e-003f,
+ 8.46779719e-003f, 0.03730333f, -0.02905126f, 0.03573337f, -0.04393689f,
+ -0.12014472f, 0.03176554f, -2.76015815e-003f, 0.10824566f, 0.05090732f,
+ -3.30179278e-003f, -0.05123822f, 5.04784798e-003f, -0.05664124f,
+ -5.99415926e-003f, -0.05341901f, -0.01221393f, 0.01291318f,
+ 9.91760660e-003f, -7.56987557e-003f, -0.06193124f, -2.24549137e-003f,
+ 0.01987562f, -0.02018840f, -0.06975540f, -0.06601523f, -0.03349112f,
+ -0.08910118f, -0.03371435f, -0.07406893f, -0.02248047f, -0.06159951f,
+ 2.77751544e-003f, -0.05723337f, -0.04792468f, 0.07518548f,
+ 2.77279224e-003f, 0.04211938f, 0.03100502f, 0.05278448f, 0.03954679f,
+ -0.03006846f, -0.03851741f, -0.02792403f, -0.02875333f, 0.01531280f,
+ 0.02186953f, -0.01989829f, 2.50679464e-003f, -0.10258728f,
+ -0.04785743f, -0.02887216f, 3.85063468e-003f, 0.01112236f,
+ 8.29218887e-003f, -0.04822981f, -0.04503597f, -0.03713100f,
+ -0.06988008f, -0.11002295f, -2.69209221e-003f, 1.85383670e-003f,
+ -0.05921049f, -0.06105053f, -0.08458050f, -0.04527602f,
+ 8.90329306e-004f, -0.05875023f, -2.68602883e-003f, -0.01591195f,
+ 0.03631859f, 0.05493166f, 0.07300330f, 5.53333294e-003f, 0.06400407f,
+ 0.01847740f, -5.76280477e-003f, -0.03210877f, 4.25160583e-003f,
+ 0.01166520f, -1.44864211e-003f, 0.02253744f, -0.03367080f, 0.06983195f,
+ -4.22323542e-003f, -8.89401045e-003f, -0.07943393f, 0.05199728f,
+ 0.06065201f, 0.04133492f, 1.44032843e-003f, -0.09585235f, -0.03964731f,
+ 0.04232114f, 0.01750465f, -0.04487902f, -7.59733608e-003f, 0.02011171f,
+ 0.04673622f, 0.09011173f, -0.07869188f, -0.04682482f, -0.05080139f,
+ -3.99383716e-003f, -0.05346331f, 0.01085723f, -0.03599333f,
+ -0.07097908f, 0.03551549f, 0.02680387f, 0.03471529f, 0.01790393f,
+ 0.05471273f, 9.62048303e-003f, -0.03180215f, 0.05864431f, 0.02330614f,
+ 0.01633144f, -0.05616681f, -0.10245429f, -0.08302189f, 0.07291322f,
+ -0.01972590f, -0.02619633f, -0.02485327f, -0.04627592f,
+ 1.48853404e-003f, 0.05514185f, -0.01270860f, -0.01948900f, 0.06373586f,
+ 0.05002292f, -0.03009798f, 8.76216311e-003f, -0.02474238f,
+ -0.05504891f, 1.74034527e-003f, -0.03333667f, 0.01524987f, 0.11663762f,
+ -1.32344989e-003f, -0.06608453f, 0.05687166f, -6.89525274e-004f,
+ -0.04402352f, 0.09450210f, -0.04222684f, -0.05360983f, 0.01779531f,
+ 0.02561388f, -0.11075410f, -8.77790991e-003f, -0.01099504f,
+ -0.10380266f, 0.03103457f, -0.02105741f, -0.07371717f, 0.05146710f,
+ 0.10581432f, -0.08617968f, -0.02892107f, 0.01092199f, 0.14551543f,
+ -2.24320893e-003f, -0.05818033f, -0.07390742f, 0.05701261f,
+ 0.12937020f, -0.04986651f, 0.10182415f, 0.05028650f, 0.12515625f,
+ 0.09175041f, 0.06404983f, 0.01523394f, 0.09460562f, 0.06106631f,
+ -0.14266998f, -0.02926703f, 0.02762171f, 0.02164151f,
+ -9.58488265e-004f, -0.04231362f, -0.09866509f, 0.04322244f,
+ 0.05872034f, -0.04838847f, 0.06319253f, 0.02443798f, -0.03606876f,
+ 9.38737206e-003f, 0.04289991f, -0.01027411f, 0.08156885f, 0.08751175f,
+ -0.13191354f, 8.16054735e-003f, -0.01452161f, 0.02952677f, 0.03615945f,
+ -2.09128903e-003f, 0.02246693f, 0.09623287f, 0.09412123f, -0.02924758f,
+ -0.07815186f, -0.02203079f, -2.02566991e-003f, 0.01094733f,
+ -0.01442332f, 0.02838561f, 0.11882371f, 7.28798332e-003f, -0.10345965f,
+ 0.07561217f, -0.02049661f, 4.44177445e-003f, 0.01609347f, -0.04893158f,
+ -0.08758243f, -7.67420698e-003f, 0.08862378f, 0.06098121f, 0.06565887f,
+ 7.32981879e-003f, 0.03558407f, -0.03874352f, -0.02490055f,
+ -0.06771075f, 0.09939223f, -0.01066077f, 0.01382995f, -0.07289080f,
+ 7.47184316e-003f, 0.10621431f, -0.02878659f, 0.02383525f, -0.03274646f,
+ 0.02137008f, 0.03837290f, 0.02450992f, -0.04296818f, -0.02895143f,
+ 0.05327370f, 0.01499020f, 0.04998732f, 0.12938657f, 0.09391870f,
+ 0.04292390f, -0.03359194f, -0.06809492f, 0.01125796f, 0.17290455f,
+ -0.03430733f, -0.06255233f, -0.01813114f, 0.11726857f, -0.06127599f,
+ -0.08677909f, -0.03429872f, 0.04684938f, 0.08161420f, 0.03538774f,
+ 0.01833884f, 0.11321855f, 0.03261845f, -0.04826299f, 0.01752407f,
+ -0.01796414f, -0.10464549f, -3.30041884e-003f, 2.29343961e-004f,
+ 0.01457292f, -0.02132982f, -0.02602923f, -9.87351313e-003f,
+ 0.04273872f, -0.02103316f, -0.07994065f, 0.02614958f, -0.02111666f,
+ -0.06964913f, -0.13453490f, -0.06861878f, -6.09341264e-003f,
+ 0.08251446f, 0.15612499f, 2.46531400e-003f, 8.88424646e-003f,
+ -0.04152999f, 0.02054853f, 0.05277953f, -0.03087788f, 0.02817579f,
+ 0.13939077f, 0.07641046f, -0.03627627f, -0.03015098f, -0.04041540f,
+ -0.01360690f, -0.06227205f, -0.02738223f, 0.13577610f, 0.15235767f,
+ -0.05392922f, -0.11175954f, 0.02157129f, 0.01146481f, -0.05264937f,
+ -0.06595174f, -0.02749175f, 0.11812254f, 0.17404149f, -0.06137035f,
+ -0.11003478f, -0.01351621f, -0.01745916f, -0.08577441f, -0.04469909f,
+ -0.06106115f, 0.10559758f, 0.20806813f, -0.09174948f, 7.09621934e-004f,
+ 0.03579374f, 0.07215115f, 0.02221742f, 0.01827742f, -7.90785067e-003f,
+ 0.01489554f, 0.14519960f, -0.06425831f, 0.02990399f, -1.80181325e-003f,
+ -0.01401528f, -0.04171134f, -3.70530109e-003f, -0.09090481f,
+ 0.09520713f, 0.08845516f, -0.02651753f, -0.03016730f, 0.02562448f,
+ 0.03563816f, -0.03817881f, 0.01433385f, 0.02256983f, 0.02872120f,
+ 0.01001934f, -0.06332260f, 0.04338406f, 0.07001807f, -0.04705722f,
+ -0.07318907f, 0.02630457f, 0.03106382f, 0.06648342f, 0.10913180f,
+ -0.01630815f, 0.02910308f, 0.02895109f, 0.08040254f, 0.06969310f,
+ 0.06797734f, 6.08639978e-003f, 4.16588830e-003f, 0.08926726f,
+ -0.03123648f, 0.02700146f, 0.01168734f, -0.01631594f, 4.61015804e-003f,
+ 8.51359498e-003f, -0.03544224f, 0.03571994f, 4.29766066e-003f,
+ -0.01970077f, -8.79793242e-003f, 0.09607988f, 0.01544222f,
+ -0.03923707f, 0.07308586f, 0.06061262f, 1.31683104e-004f,
+ -7.98222050e-003f, 0.02399261f, -0.06084389f, -0.02743429f,
+ -0.05475523f, -0.04131311f, 0.03559756f, 0.03055342f, 0.02981433f,
+ 0.14860515f, 0.01766787f, 0.02945257f, 0.04898238f, 0.01026922f,
+ 0.02811658f, 0.08267091f, 0.02732154f, -0.01237693f, 0.11760156f,
+ 0.03802063f, -0.03309754f, 5.24957618e-003f, -0.02460510f, 0.02691451f,
+ 0.05399988f, -0.10133506f, 0.06385437f, -0.01818005f, 0.02259503f,
+ 0.03573135f, 0.01042848f, -0.04153402f, -0.04043029f, 0.01643575f,
+ 0.08326677f, 4.61383024e-004f, -0.05308095f, -0.08536223f,
+ -1.61011645e-003f, -0.02163720f, -0.01783352f, 0.03859637f,
+ 0.08498885f, -0.01725216f, 0.08625131f, 0.10995087f, 0.09177644f,
+ 0.08498347f, 0.07646490f, 0.05580502f, 0.02693516f, 0.09996913f,
+ 0.09070327f, 0.06667200f, 0.05873008f, -0.02247842f, 0.07772321f,
+ 0.12408436f, 0.12629253f, -8.41997913e-004f, 0.01477783f, 0.09165990f,
+ -2.98401713e-003f, -0.06466447f, -0.07057302f, 2.09516948e-004f,
+ 0.02210209f, -0.02158809f, -0.08602506f, -0.02284836f,
+ 4.01876355e-003f, 9.56660323e-003f, -0.02073978f, -0.04635138f,
+ -7.59423291e-003f, -0.01377393f, -0.04559359f, -0.13284740f,
+ -0.08671406f, -0.03654395f, 0.01142869f, 0.03287891f, -0.04392983f,
+ 0.06142959f, 0.17710890f, 0.10385257f, 0.01329137f, 0.10067633f,
+ 0.12450829f, -0.04476709f, 0.09049144f, 0.04589312f, 0.11167907f,
+ 0.08587538f, 0.04767583f, 1.67188141e-003f, 0.02359802f, -0.03808852f,
+ 0.03126272f, -0.01919029f, -0.05698918f, -0.02365112f, -0.06519032f,
+ -0.05599358f, -0.07097308f, -0.03301812f, -0.04719102f, -0.02566297f,
+ 0.01324074f, -0.09230672f, -0.05518232f, -0.04712864f, -0.03380903f,
+ -0.06719479f, 0.01183908f, -0.09326738f, 0.01642865f, 0.03789867f,
+ -6.61567831e-003f, 0.07796386f, 0.07246574f, 0.04706347f, -0.02523437f,
+ -0.01696830f, -0.08068866f, 0.06030888f, 0.10527060f, -0.06611756f,
+ 0.02977346f, 0.02621830f, 0.01913855f, -0.08479366f, -0.06322418f,
+ -0.13570616f, -0.07644490f, 9.31900274e-003f, -0.08095149f,
+ -0.10197903f, -0.05204025f, 0.01413151f, -0.07800411f, -0.01885122f,
+ -0.07509381f, -0.10136326f, -0.05212355f, -0.09944065f,
+ -1.33606605e-003f, -0.06342617f, -0.04178550f, -0.12373723f,
+ -0.02832736f, -0.06057501f, 0.05830070f, 0.07604282f, -0.06462587f,
+ 8.02447461e-003f, 0.11580125f, 0.12332212f, 0.01978462f,
+ -2.72378162e-003f, 0.05850752f, -0.04674481f, 0.05148062f,
+ -2.62542837e-003f, 0.11253355f, 0.09893716f, 0.09785093f, -0.04659257f,
+ -0.01102429f, -0.07002308f, 0.03088913f, -0.02565549f, -0.07671449f,
+ 3.17443861e-003f, -0.10783514f, -0.02314270f, -0.11089555f,
+ -0.01024768f, 0.03116021f, -0.04964825f, 0.02281825f, 5.50005678e-003f,
+ -0.08427856f, -0.14685495f, -0.07719755f, -0.13342668f, -0.04525511f,
+ -0.09914210f, 0.02588859f, 0.03469279f, 0.04664020f, 0.11688190f,
+ 0.09647275f, 0.10857815f, -0.01448726f, 0.04299758f, -0.06763151f,
+ 1.33257592e-003f, 0.14331576f, 0.07574340f, 0.09166205f, 0.05674926f,
+ 0.11325553f, -0.01106494f, 0.02062161f, -0.11484840f, -0.07492137f,
+ -0.02864293f, -0.01275638f, -0.06946032f, -0.10101652f, -0.04113498f,
+ -0.02214783f, -0.01273942f, -0.07480393f, -0.10556041f, -0.07622112f,
+ -0.09988393f, -0.11453961f, -0.12073903f, -0.09412795f, -0.07146588f,
+ -0.04054537f, -0.06127083f, 0.04221122f, 0.07688113f, 0.04099256f,
+ 0.12663734f, 0.14683802f, 0.21761774f, 0.12525328f, 0.18431792f,
+ -1.66402373e-003f, 2.37777247e-003f, 0.01445475f, 0.03509416f,
+ 0.02654697f, 0.01716739f, 0.05374011f, 0.02944174f, 0.11323927f,
+ -0.01485456f, -0.01611330f, -1.85554172e-003f, -0.01708549f,
+ -0.05435753f, -0.05302101f, 0.05260378f, -0.03582945f,
+ -3.42867890e-004f, 1.36076682e-003f, -0.04436073f, -0.04228432f,
+ 0.03281291f, -0.05480836f, -0.10197772f, -0.07206279f, -0.10741059f,
+ -0.02366946f, 0.10278475f, -2.74783419e-003f, -0.03242477f,
+ 0.02308955f, 0.02835869f, 0.10348799f, 0.19580358f, 0.10252027f,
+ 0.08039929f, 0.05525554f, -0.13250865f, -0.14395352f, 3.13586881e-003f,
+ -0.03387071f, 8.94669443e-003f, 0.05406157f, -4.97324532e-003f,
+ -0.01189114f, 2.82919413e-004f, -0.03901557f, -0.04898705f,
+ 0.02164520f, -0.01382906f, -0.01850416f, 0.01869347f, -0.02450060f,
+ 0.02291678f, 0.08196463f, 0.03309153f, -0.10629974f, 0.02473924f,
+ 0.05344394f, -0.02404823f, -0.03243643f, -5.55244600e-003f,
+ -0.08009996f, 0.02811539f, 0.04235742f, 0.01859004f, 0.04902123f,
+ -0.01438252f, -0.01526853f, 0.02044195f, -0.05008660f, 0.04244113f,
+ 0.07611816f, 0.04950470f, -0.06020549f, -4.26026015e-003f, 0.13133512f,
+ -0.01438738f, -0.01958807f, -0.04044152f, -0.12425045f,
+ 2.84353318e-003f, -0.05042776f, -0.09121484f, 7.34345755e-003f,
+ 0.09388847f, 0.11800314f, 4.72295098e-003f, 4.44378285e-003f,
+ -0.07984917f, -0.03613737f, 0.04490915f, -0.02246483f, 0.04681071f,
+ 0.05240871f, 0.02157206f, -0.04603431f, -0.01197929f, -0.02748779f,
+ 0.13621049f, 0.08812155f, -0.07802048f, 4.86458559e-003f, -0.01598836f,
+ 0.01024450f, -0.03463517f, -0.02304239f, -0.08692665f, 0.06655128f,
+ 0.05785803f, -0.12640759f, 0.02307472f, 0.07337402f, 0.07525434f,
+ 0.04943763f, -0.02241034f, -0.09978238f, 0.14487994f, -0.06570521f,
+ -0.07855482f, 0.02830222f, -5.29603509e-004f, -0.04669895f,
+ -0.11822784f, -0.12246452f, -0.15365660f, -0.02969127f, 0.08078201f,
+ 0.13512598f, 0.11505685f, 0.04740673f, 0.01376022f, -0.05852978f,
+ -0.01537809f, -0.05541119f, 0.02491065f, -0.02870786f, 0.02760978f,
+ 0.23836176f, 0.22347429f, 0.10306466f, -0.06919070f, -0.10132039f,
+ -0.20198342f, -0.05040560f, 0.27163076f, 0.36987007f, 0.34540465f,
+ 0.29095781f, 0.05649706f, 0.04125737f, 0.07505883f, -0.02737836f,
+ -8.43431335e-003f, 0.07368195f, 0.01653876f, -0.09402955f,
+ -0.09574359f, 0.01474337f, -0.07128561f, -0.03460737f, 0.11438941f,
+ 0.13752601f, -0.06385452f, -0.06310338f, 8.19548313e-003f, 0.11622470f,
+ 5.05133113e-003f, -0.07602754f, 0.06695660f, 0.25723928f, 0.09037900f,
+ 0.28826267f, 0.13165380f, -0.05312614f, -0.02137198f, -0.03442232f,
+ -0.06255679f, 0.03899667f, 0.18391028f, 0.26016650f, 0.03374462f,
+ 0.01860465f, 0.19077586f, 0.18160543f, 3.43634398e-003f, -0.03036782f,
+ 0.19683038f, 0.35378191f, 0.24968483f, -0.03222649f, 0.28972381f,
+ 0.43091634f, 0.30778357f, 0.02335266f, -0.09877399f, -6.85245218e-003f,
+ 0.08945240f, -0.08150686f, 0.02792493f, 0.24806842f, 0.17338486f,
+ 0.06231801f, -0.10432383f, -0.16653322f, -0.13197899f, -0.08531576f,
+ -0.19271527f, -0.13536365f, 0.22240199f, 0.39219588f, 0.26597717f,
+ -0.01231649f, 0.01016179f, 0.13379875f, 0.12018334f, -0.04852953f,
+ -0.07915270f, 0.07036012f, 3.87723115e-003f, -0.06126805f,
+ -0.15015170f, -0.11406515f, -0.08556531f, -0.07429333f, -0.16115491f,
+ 0.13214062f, 0.25691369f, 0.05697750f, 0.06861912f, -6.02903729e-003f,
+ -7.94562511e-003f, 0.04799571f, 0.06695165f, -0.01926842f, 0.06206308f,
+ 0.13450983f, -0.06381495f, -2.98370165e-003f, -0.03482971f,
+ 7.53991678e-003f, 0.03895611f, 0.11464261f, 0.01669971f,
+ 8.27818643e-003f, -7.49160210e-003f, -0.11712562f, -0.10650621f,
+ -0.10353880f, -0.04994106f, -7.65618810e-004f, 0.03023767f,
+ -0.04759270f, -0.07302686f, -0.05825012f, -0.13156348f, -0.10639747f,
+ -0.19393684f, -0.09973683f, -0.07918908f, 4.63177625e-004f,
+ -6.61382044e-004f, 0.15853868f, 0.08561199f, -0.07660093f,
+ -0.08015265f, -0.06164073f, 0.01882577f, -7.29908410e-004f,
+ 0.06840892f, 0.03843764f, 0.20274927f, 0.22028814f, -5.26101235e-003f,
+ 0.01452435f, -0.06331623f, 0.02865064f, 0.05673740f, 0.12171564f,
+ 0.03837196f, 0.03555467f, -0.02662914f, -0.10280123f, -0.06526285f,
+ -0.11066351f, -0.08988424f, -0.10103678f, 8.10526591e-003f,
+ 5.95238712e-003f, 0.02617721f, -0.01705742f, -0.10897956f,
+ -0.08004991f, -0.11271993f, -0.06185647f, -0.06103712f, 0.01597041f,
+ -0.05923606f, 0.09410726f, 0.22858568f, 0.03263380f, 0.06772990f,
+ -0.09003516f, 0.01017870f, 0.01931688f, 0.08628357f, -0.01430009f,
+ 0.10954945f, 0.16612452f, -0.02434544f, -0.03310068f, -0.04236627f,
+ 0.01212392f, -6.15046406e-003f, 0.06954194f, 0.03015283f, 0.01787957f,
+ 0.02781667f, -0.05561153f, -8.96244217e-003f, -0.04971489f,
+ 0.07510284f, 0.01775282f, 0.05889897f, -0.07981427f, 0.03647643f,
+ -3.73833324e-003f, -0.08894575f, -0.06429435f, -0.08068276f,
+ 0.03567704f, -0.07131936f, -7.21910037e-003f, -0.09566668f,
+ 0.17886090f, 0.14911725f, 0.02070032f, -0.05017120f, -0.04992622f,
+ 0.01570143f, -0.09906903f, 0.06456193f, 0.15329507f, 0.18820767f,
+ 0.11689861f, -0.01178513f, -0.02225163f, -0.01905318f, 0.10271224f,
+ -7.27029052e-003f, 0.11664233f, 0.14796902f, 0.07771893f, 0.02400013f,
+ -0.05361797f, -0.01972888f, 0.01376177f, 0.06740040f, -0.06525395f,
+ 0.05726178f, -0.02404981f, -0.14018567f, -0.02074987f, -0.04621970f,
+ -0.04688627f, -0.01842059f, 0.07722727f, -0.04852883f, 0.01529004f,
+ -0.19639495f, 0.10817073f, 0.03795860f, -0.09435206f, -0.07984378f,
+ -0.03383440f, 0.11081333f, 0.02237366f, 0.12703256f, 0.21613893f,
+ 0.02918790f, 4.66472283e-003f, -0.10274266f, -0.04854131f,
+ -3.46305710e-003f, 0.08652268f, 0.02251546f, 0.09636052f, 0.17180754f,
+ -0.09272388f, 4.59174305e-004f, -0.11723048f, -0.12210111f,
+ -0.15547538f, 0.07218186f, -0.05297846f, 0.03779940f, 0.05150875f,
+ -0.03802310f, 0.03870645f, -0.15250699f, -0.08696499f, -0.02021560f,
+ 0.04118926f, -0.15177974f, 0.01577647f, 0.10249301f, 7.50041893e-003f,
+ 0.01721806f, -0.06828983f, -0.02397596f, -0.06598977f, -0.04317593f,
+ -0.08064980f, 6.66632550e-003f, 0.03333484f, 0.07093620f, 0.08231064f,
+ -0.06577903f, -0.06698844f, -0.06984019f, -0.06508023f, -0.14145090f,
+ -0.02393239f, 0.06485303f, 8.83263443e-003f, 0.09251080f, -0.07557579f,
+ -0.05067699f, -0.09798748f, -0.06703258f, -0.14056294f, 0.03245994f,
+ 0.12554143f, 0.01761621f, 0.12980327f, -0.04081950f, -0.11906909f,
+ -0.14813015f, -0.08376863f, -0.12200681f, 0.04988137f, 0.05424247f,
+ -3.90952639e-003f, 0.03255733f, -0.12717837f, -0.07461493f,
+ -0.05703964f, -0.01736189f, -0.08026433f, -0.05433894f, -0.01719359f,
+ 0.02886275f, 0.01772653f, -0.09163518f, 3.57789593e-003f, -0.10129993f,
+ -0.02653764f, -0.08131415f, -0.03847986f, -7.62157550e-004f,
+ 0.06486648f, 0.19675669f, -0.04919156f, -0.07059129f, -0.04857785f,
+ -0.01042383f, -0.08328653f, 0.03660302f, -0.03696846f, 0.04969259f,
+ 0.08241162f, -0.12514858f, -0.06122676f, -0.03750202f,
+ 6.52989605e-003f, -0.10247213f, 0.02568346f, 4.51781414e-003f,
+ -0.03734229f, -0.01131264f, -0.05412074f, 8.89345480e-004f,
+ -0.12388977f, -0.05959237f, -0.12418608f, -0.06151643f, -0.07310260f,
+ 0.02441575f, 0.07023528f, -0.07548289f, -7.57147965e-004f,
+ -0.09061348f, -0.08112976f, -0.06920306f, 9.54394229e-003f,
+ -0.01219902f, 1.21273217e-003f, -8.88989680e-003f, -0.08309301f,
+ -0.04552661f, -0.10739882f, -0.05691034f, -0.13928030f, 0.09027749f,
+ 0.15123098f, 0.03175976f, 0.17763577f, 3.29913251e-004f, 0.05151888f,
+ -0.09844074f, -0.09475287f, -0.08571247f, 0.16241577f, 0.19336018f,
+ 8.57454538e-003f, 0.11474732f, -0.01493934f, 0.03352379f, -0.08966240f,
+ -0.02322310f, 0.02663568f, 0.05448750f, -0.03536883f, -0.07210463f,
+ -0.06807277f, -0.03121621f, -0.05932408f, -0.17282860f, -0.15873498f,
+ -0.04956378f, 0.01603377f, -0.12385946f, 0.13878587f, 0.21468069f,
+ 0.13510075f, 0.20992437f, 0.08845878f, 0.08104013f, 0.03754176f,
+ 0.12173114f, 0.11103114f, 0.10643122f, 0.13941477f, 0.11640384f,
+ 0.14786847f, 0.01218238f, 0.01160753f, 0.03547940f, 0.08794311f,
+ -0.01695384f, -0.07692261f, -0.08236158f, 6.79194089e-003f,
+ -0.02458403f, 0.13022894f, 0.10953187f, 0.09857773f, 0.04735930f,
+ -0.04353498f, -0.15173385f, -0.17904443f, -0.10450364f, -0.13418166f,
+ -0.06633098f, -0.03170381f, -0.06839000f, -0.11350126f, -0.06983913f,
+ 0.19083543f, 0.17604128f, 0.07730632f, 0.10022651f, 0.36428109f,
+ 0.28291923f, 0.12688625f, 0.15942036f, 0.14064661f, -0.11201853f,
+ -0.13969108f, -0.09088077f, -0.14107047f, 0.05117374f,
+ -2.63348082e-003f, -0.10794610f, -0.09715455f, -0.05284977f,
+ 0.01565668f, 0.05031200f, 0.07021113f, -0.02963028f, 0.01766960f,
+ 0.08333644f, -0.03211382f, 4.90096770e-003f, 0.05186674f, -0.05045737f,
+ -0.09624767f, -0.02525997f, 0.06916669f, 0.01213916f, 0.05333899f,
+ -0.03443280f, -0.10055527f, -0.06291115f, 5.42851724e-003f,
+ -6.30360236e-003f, 0.02270257f, -0.01769792f, 0.03273688f, 0.07746078f,
+ 7.77099328e-003f, 0.05041346f, 0.01648103f, -0.02321534f, -0.09930186f,
+ -0.02293853f, 0.02034990f, -0.08324204f, 0.08510064f, -0.03732836f,
+ -0.06465405f, -0.06086946f, 0.13680504f, -0.11469388f, -0.03896406f,
+ -0.07142810f, 2.67581246e-003f, -0.03639632f, -0.09849060f,
+ -0.11014334f, 0.17489147f, 0.17610909f, -0.16091567f, -0.07248894f,
+ 0.01567141f, 0.23742996f, 0.07552249f, -0.06270349f, -0.07303379f,
+ 0.25442186f, 0.16903116f, -0.08168741f, -0.05913896f, -0.03954096f,
+ 6.81776879e-003f, -0.05615319f, -0.07303037f, -0.12176382f,
+ 0.12385108f, 0.22084464f, -0.05543206f, -0.03310431f, 0.05731593f,
+ 0.19481890f, 0.04016430f, -0.06480758f, -0.12353460f, 0.18733442f,
+ -0.09631214f, -0.11192076f, 0.12404587f, 0.15671748f, 0.19256128f,
+ 0.10895617f, 0.03391477f, -0.13032004f, -0.05626907f, -0.09025607f,
+ 0.23485197f, 0.27812332f, 0.26725492f, 0.07255980f, 0.16565137f,
+ 0.22388470f, 0.07441066f, -0.21003133f, -0.08075339f, -0.15031935f,
+ 0.07023834f, 0.10872041f, 0.18156518f, 0.20037253f, 0.13571967f,
+ -0.11915682f, -0.11131983f, -0.18878011f, 0.06074620f, 0.20578890f,
+ 0.12413109f, 0.03930207f, 0.29176015f, 0.29502738f, 0.27856228f,
+ -0.01803601f, 0.16646385f, 0.19268319f, 0.01900682f, 0.06026287f,
+ 2.35868432e-003f, 0.01558199f, 0.02707230f, 0.11383014f, 0.12103992f,
+ 0.03907350f, 0.04637353f, 0.09020995f, 0.11919726f, -3.63007211e-003f,
+ 0.02220155f, 0.10336831f, 0.17351882f, 0.12259731f, 0.18983354f,
+ 0.15736865f, 0.01160725f, -0.01690723f, -9.69582412e-004f, 0.07213813f,
+ 0.01161613f, 0.17864859f, 0.24486147f, 0.18208991f, 0.20177495f,
+ 0.05972528f, -8.93934630e-003f, -0.02316955f, 0.14436610f, 0.14114498f,
+ 0.05520950f, 0.06353590f, -0.19124921f, 0.10174713f, 0.29414919f,
+ 0.26448128f, 0.09344960f, 0.15284036f, 0.19797507f, 0.11369792f,
+ -0.12722753f, -0.21396367f, -0.02008235f, -0.06566695f, -0.01662150f,
+ -0.03937003f, 0.04778343f, 0.05017274f, -0.02299062f, -0.20208496f,
+ -0.06395898f, 0.13721776f, 0.22544557f, 0.14888357f, 0.08687132f,
+ 0.27088094f, 0.32206613f, 0.09782200f, -0.18523243f, -0.17232181f,
+ -0.01041531f, 0.04008654f, 0.04199702f, -0.08081299f, -0.03755421f,
+ -0.04809646f, -0.05222081f, -0.21709201f, -0.06622940f, 0.02945281f,
+ -0.04600435f, -0.05256077f, -0.08432942f, 0.02848100f, 0.03490564f,
+ 8.28621630e-003f, -0.11051246f, -0.11210597f, -0.01998289f,
+ -0.05369405f, -0.08869293f, -0.18799506f, -0.05436598f, -0.05011634f,
+ -0.05419716f, -0.06151857f, -0.10827805f, 0.04346735f, 0.04016083f,
+ 0.01520820f, -0.12173316f, -0.04880285f, -0.01101406f, 0.03250847f,
+ -0.06009551f, -0.03082932f, -0.02295134f, -0.06856834f, -0.08775249f,
+ -0.23793389f, -0.09174541f, -0.05538322f, -0.04321031f, -0.11874759f,
+ -0.04221844f, -0.06070468f, 0.01194489f, 0.02608565f, -0.03892140f,
+ -0.01643151f, -0.02602034f, -0.01305472f, 0.03920100f, -0.06514261f,
+ 0.01126918f, -6.27710763e-003f, -0.02720047f, -0.11133634f,
+ 0.03300330f, 0.02398472f, 0.04079665f, -0.10564448f, 0.05966159f,
+ 0.01195221f, -0.03179441f, -0.01692590f, -0.06177841f, 0.01841576f,
+ -5.51078189e-003f, -0.06821765f, -0.03191888f, -0.09545476f,
+ 0.03030550f, -0.04896152f, -0.02914624f, -0.13283344f, -0.04783419f,
+ 6.07836898e-003f, -0.01449538f, -0.13358212f, -0.09687774f,
+ -0.02813793f, 0.01213498f, 0.06650011f, -0.02039067f, 0.13356198f,
+ 0.05986415f, -9.12760664e-003f, -0.18780160f, -0.11992817f,
+ -0.06342237f, 0.01229534f, 0.07143231f, 0.10713009f, 0.11085765f,
+ 0.06569190f, -0.02956399f, -0.16288325f, -0.13993549f, -0.01292515f,
+ 0.03833013f, 0.09130384f, -0.05086257f, 0.05617329f, -0.03896667f,
+ -0.06282311f, -0.11490010f, -0.14264110f, -0.04530499f, 0.01598189f,
+ 0.09167797f, 0.08663294f, 0.04885277f, -0.05741219f, -0.07565769f,
+ -0.17136464f, -0.02619422f, -0.02477579f, 0.02679587f, 0.11621952f,
+ 0.08788391f, 0.15520640f, 0.04709549f, 0.04504483f, -0.10214074f,
+ -0.12293372f, -0.04820546f, -0.05484834f, 0.05473754f, 0.07346445f,
+ 0.05577277f, -0.08209965f, 0.03462975f, -0.20962234f, -0.09324598f,
+ 3.79481679e-003f, 0.03617633f, 0.16742408f, 0.07058107f, 0.10204960f,
+ -0.06795346f, 3.22807301e-003f, -0.12589309f, -0.17496960f,
+ 0.02078314f, -0.07694324f, 0.12184640f, 0.08997164f, 0.04793497f,
+ -0.11383379f, -0.08046359f, -0.25716835f, -0.08080962f,
+ 6.80711539e-003f, -0.02930280f, -3.04938294e-003f, -0.11106286f,
+ -0.04628860f, -0.07821649f, 7.70127494e-003f, -0.10247706f,
+ 1.21042714e-003f, 0.20573859f, -0.03241005f, 8.42972286e-003f,
+ 0.01946464f, -0.01197973f, -0.14579976f, 0.04233614f,
+ -4.14096704e-003f, -0.06866436f, -0.02431862f, -0.13529138f,
+ 1.25891645e-003f, -0.11425111f, -0.04303651f, -0.01694815f,
+ 0.05720210f, -0.16040207f, 0.02772896f, 0.05498345f, -0.15010567f,
+ 0.01450866f, 0.02350303f, -0.04301004f, -0.04951802f, 0.21702233f,
+ -0.03159155f, -0.01963303f, 0.18232647f, -0.03263875f,
+ -2.88476888e-003f, 0.01587562f, -1.94303901e-003f, -0.07789494f,
+ 0.04674156f, -6.25576358e-003f, 0.08925962f, 0.21353747f, 0.01254677f,
+ -0.06999976f, -0.05931328f, -0.01884327f, -0.04306272f, 0.11794136f,
+ 0.03842728f, -0.03907030f, 0.05636114f, -0.09766009f, -0.02104000f,
+ 8.72711372e-003f, -0.02736877f, -0.05112274f, 0.16996814f, 0.02955785f,
+ 0.02094014f, 0.08414304f, -0.03335762f, -0.03617457f, -0.05808248f,
+ -0.08872101f, 0.02927705f, 0.27077839f, 0.06075108f, 0.07478261f,
+ 0.15282831f, -0.03908454f, -0.05101782f, -9.51998029e-003f,
+ -0.03272416f, -0.08735625f, 0.07633440f, -0.07185312f, 0.13841286f,
+ 0.07812646f, -0.12901451f, -0.05488589f, -0.05644578f, -0.03290703f,
+ -0.11184757f, 0.03751570f, -0.05978153f, -0.09155276f, 0.05657315f,
+ -0.04328186f, -0.03047933f, -0.01413135f, -0.10181040f, -0.01384013f,
+ 0.20132534f, -0.01536873f, -0.07641169f, 0.05906778f, -0.07833145f,
+ -0.01523801f, -0.07502609f, -0.09461885f, -0.15013233f, 0.16050665f,
+ 0.09021381f, 0.08473236f, 0.03386267f, -0.09147339f, -0.09170618f,
+ -0.08498498f, -0.05119187f, -0.10431040f, 0.01041618f, -0.03064913f,
+ 0.09340212f, 0.06448522f, -0.03881054f, -0.04985436f, -0.14794017f,
+ -0.05200112f, -0.02144495f, 0.04000821f, 0.12420804f, -0.01851651f,
+ -0.04116732f, -0.11951703f, -0.04879033f, -0.08722515f, -0.08454733f,
+ -0.10549165f, 0.11251976f, 0.10766345f, 0.19201984f, 0.06128913f,
+ -0.02734615f, -0.08834923f, -0.16999826f, -0.03548348f,
+ -5.36092324e-003f, 0.08297954f, 0.07226378f, 0.04194529f, 0.04668673f,
+ 8.73902347e-003f, 0.06980139f, 0.05652480f, 0.05879445f, 0.02477076f,
+ 0.02451423f, 0.12433673f, 0.05600227f, 0.06886370f, 0.03863076f,
+ 0.07459056f, 0.02264139f, 0.01495469f, 0.06344220f, 0.06945208f,
+ 0.02931899f, 0.11719371f, 0.04527427f, 0.03248192f, 2.08271481e-003f,
+ 0.02044626f, 0.11403449f, 0.04303892f, 0.06444661f, 0.04959024f,
+ 0.08174094f, 0.09240247f, 0.04894639f, 0.02252937f, -0.01652530f,
+ 0.07587013f, 0.06064249f, 0.13954395f, 0.02772832f, 0.07093039f,
+ 0.08501238f, 0.01701301f, 0.09055722f, 0.33421436f, 0.20163782f,
+ 0.09821030f, 0.07951369f, 0.08695120f, -0.12757730f, -0.13865978f,
+ -0.06610068f, -0.10985506f, 0.03406816f, -0.01116336f, -0.07281768f,
+ -0.13525715f, -0.12844718f, 0.08956250f, 0.09171610f, 0.10092317f,
+ 0.23385370f, 0.34489515f, 0.09901748f, 0.02002922f, 0.12335990f,
+ 0.07606190f, -0.14899330f, -0.15634622f, -0.06494618f, -0.01760547f,
+ 0.03404277f, -0.13208845f, -0.12101169f, -0.18294574f, -0.16560709f,
+ 0.02183887f, -0.02752613f, 0.01813638f, 0.02000757f, 0.01319924f,
+ 0.08030242f, 0.01220535f, 2.98233377e-003f, -0.01307070f, 0.05970297f,
+ -0.05345284f, -0.03381982f, -9.87543724e-003f, -0.06869387f,
+ 0.03956730f, -0.03108176f, -0.05732809f, 0.02172386f, 0.04159765f,
+ 2.62783933e-003f, 0.04813229f, 0.09358983f, -8.18389002e-003f,
+ 0.01724574f, -0.02547474f, -0.04967288f, -0.02390376f, 0.06640504f,
+ -0.06306566f, 0.01137518f, 0.05589378f, -0.08237787f, 0.02455001f,
+ -0.03059422f, -0.08953978f, 0.06851497f, 0.07190268f, -0.07610799f,
+ 7.87237938e-003f, -7.85830803e-003f, 0.06006952f, -0.01126728f,
+ -2.85743061e-003f, -0.04772895f, 0.01884944f, 0.15005857f,
+ -0.06268821f, -0.01989072f, 0.01138399f, 0.08760451f, 0.03879007f,
+ -9.66926850e-003f, -0.08012961f, 0.06414555f, -0.01362950f,
+ -0.09135523f, 0.01755159f, 0.04459474f, 0.09650917f, 0.05219948f,
+ -2.19440833e-003f, -0.07037939f, -0.01599054f, 0.13103317f,
+ -0.02492603f, -0.01032540f, -0.02903307f, 0.04489160f, 0.05148086f,
+ 0.01858173f, -0.02919228f, 0.08299296f, -0.04590359f, -0.15745632f,
+ -0.09068198f, -0.02972453f, 0.12985018f, 0.22320485f, 0.24261914f,
+ 0.03642650f, -0.05506422f, 2.67413049e-003f, -0.03834032f, 0.06449424f,
+ 0.03834866f, 0.03816991f, 0.25039271f, 0.34212017f, 0.32433882f,
+ 0.18824573f, -0.08599839f, -0.17599408f, -0.15317015f, -0.09913155f,
+ -0.02856072f, -0.05304699f, -1.06437842e-003f, -0.06641813f,
+ -0.07509298f, 0.01463361f, -0.07551918f, -0.04510373f,
+ -8.44620075e-003f, 0.01772176f, 0.04068235f, 0.20295307f, 0.15719447f,
+ 0.05712103f, 0.26296997f, 0.14657754f, 0.01547317f, -0.05052776f,
+ -0.03881342f, -0.01437883f, -0.04930177f, 0.11719568f, 0.24098417f,
+ 0.26468599f, 0.31698579f, 0.10103608f, -0.01096375f, -0.01367013f,
+ 0.17104232f, 0.20065314f, 2.67622480e-003f, -0.01190034f, 0.18301608f,
+ 0.09459770f, -0.06357619f, -0.06473801f, 0.01377906f, -0.10032775f,
+ -0.06388740f, 3.80393048e-003f, 0.06206078f, 0.10349120f, 0.26804337f,
+ 8.17918684e-003f, -0.02314351f, 9.34422202e-003f, 0.09198381f,
+ 0.03681326f, -8.77339672e-003f, -0.09662418f, -0.02715708f,
+ 0.13503517f, 0.08962728f, -6.57071499e-003f, -0.03201199f, 0.28510824f,
+ 0.32095715f, 0.18512695f, -0.14230858f, -0.14048551f, -0.07181299f,
+ -0.08575408f, -0.08661680f, -0.17416079f, 7.54326640e-004f,
+ 0.05601677f, 0.13585392f, -0.04960437f, -0.07708392f, 0.10676333f,
+ -0.04407546f, -0.07209078f, 0.03663663f, 0.28949317f, 0.41127121f,
+ 0.27431169f, -0.06900328f, -0.21474190f, -0.15578632f, -0.19555484f,
+ -0.15209621f, -0.11269179f, 0.07416003f, 0.18991330f, 0.26858172f,
+ 0.01952259f, 0.01017922f, 0.02159843f, -4.95165400e-003f, -0.04368168f,
+ -0.12721671f, -0.06673957f, -0.11275250f, 0.04413409f, 0.05578312f,
+ 0.03896771f, 0.03566417f, -0.05871816f, -0.07388090f, -0.17965563f,
+ -0.08570268f, -0.15273231f, -0.06022318f, -0.06999847f,
+ -6.81510568e-003f, 0.06294262f, -6.54901436e-004f, -0.01128654f,
+ -0.02289657f, 0.04849290f, 0.04140804f, 0.23681939f, 0.14545733f,
+ 0.01989965f, 0.12032662f, 3.87463090e-003f, -6.02597650e-003f,
+ -0.05919775f, -0.03067224f, -0.07787777f, 0.10834727f, 0.02153730f,
+ 0.02765649f, 0.03975543f, -0.12182906f, -0.04900113f, -0.09940100f,
+ -0.06453611f, -0.13757215f, -0.03721382f, 0.02827376f, -0.04351249f,
+ 0.01907038f, -0.10284120f, -0.05671160f, -0.10760647f, -0.09624009f,
+ -0.09565596f, -0.01303654f, 0.03080539f, 0.01416511f, 0.05846142f,
+ -5.42971538e-003f, 0.06221476f, -0.03320325f, -0.06791797f,
+ -0.05791342f, 0.12851369f, 0.14990346f, 0.03634374f, 0.14262885f,
+ 0.04330391f, 0.05032569f, -0.05631914f, 0.01606137f, 0.04387223f,
+ 0.22344995f, 0.15722635f, -0.04693628f, 0.03006579f, -2.52882647e-003f,
+ 0.05717621f, -0.07529724f, -0.02848588f, -0.06868757f,
+ -4.51729307e-003f, 0.06466042f, -0.05935378f, -0.04704857f,
+ -0.07363959f, 0.04843248f, -0.13421375f, -0.09789340f, -0.10255270f,
+ 0.03509852f, 0.04751543f, -0.03822323f, 0.09740467f, 0.04762916f,
+ 0.03940146f, -0.08283259f, 0.09552965f, 0.05038739f, 0.21258622f,
+ 0.09646992f, 0.03241193f, 0.05167701f, 0.04614570f, 0.04330090f,
+ -0.02671840f, -0.06259909f, -0.02301898f, 0.18829170f, 0.10522786f,
+ 0.04313190f, 0.01670948f, -0.08421925f, 0.05911417f, -0.10582602f,
+ -0.04855484f, -0.08373898f, 0.07775915f, 0.03723533f, -0.12047344f,
+ 4.86345543e-003f, -0.10520902f, 0.06571782f, -0.07528137f,
+ -0.03245651f, -0.09869066f, -0.02917477f, -0.18293270f, 0.14810945f,
+ 9.24033765e-003f, -0.04354914f, 0.02266885f, -0.11872729f,
+ -0.04016589f, 0.02830229f, 0.22539048f, 0.20565644f, 0.16701797f,
+ 0.09019924f, 0.01300652f, 0.09760600f, -0.03675831f, -0.01935448f,
+ -0.06894835f, 0.08077277f, 0.19047537f, 0.11312226f, 0.04106043f,
+ -0.11187182f, 0.04312806f, -0.18548580f, -0.11287174f, -0.08794551f,
+ 0.02078281f, -0.15295486f, 0.11806386f, -0.01103218f, -0.15971117f,
+ 0.02153538f, -0.05232147f, -0.10835317f, -0.13910367f, 0.05920752f,
+ -0.10122602f, 0.20174250f, 0.09105796f, -0.01881348f, 0.09559010f,
+ -0.03725745f, -0.09442931f, -0.09763174f, 0.05854454f, 0.08287182f,
+ 0.12919849f, 0.08594352f, -2.49806582e-003f, 0.02398440f,
+ 5.67950122e-003f, -0.06296340f, -0.12993270f, 0.03855852f, 0.05186560f,
+ 0.10839908f, -0.03380463f, -0.12654832f, -0.05399339f, -0.07456800f,
+ -0.04736232f, -0.10164231f, 0.07496139f, 0.08125214f, 0.07656177f,
+ -0.04999603f, -0.12823077f, -0.07692395f, -0.11317524f, -0.09118655f,
+ -0.05695669f, 0.10477209f, 0.07468581f, 0.01630048f, -8.00961629e-003f,
+ -0.06582128f, -0.04019095f, -0.04682907f, -0.01907842f, -0.10997720f,
+ 0.04911406f, 0.02931030f, 0.04197735f, -0.05773980f, -0.09670641f,
+ -0.03594951f, -0.03402121f, -0.07149299f, -0.10566200f, 0.10601286f,
+ 0.06340689f, -0.01518632f, -5.96402306e-003f, -0.07628012f,
+ -3.52779147e-003f, -0.02683854f, -0.10265494f, -0.02680815f,
+ 0.16338381f, 0.03103515f, 0.02296976f, 0.01624348f, -0.10831620f,
+ -0.02314233f, -0.04789969f, -0.05530700f, -0.06461314f, 0.10494506f,
+ 0.04642856f, -0.07592955f, -0.06197905f, -0.09042154f, -0.01445521f,
+ -0.04297818f, -0.11262015f, -0.11430512f, 0.03174541f, -0.03677487f,
+ -0.02963996f, -0.06610169f, -0.13292049f, -0.07059067f, -0.08444111f,
+ -0.02640536f, -0.07136250f, 0.04559967f, 0.01459980f, 0.17989251f,
+ 0.04435328f, -0.12464730f, -0.02871115f, -0.10752209f, -0.03393742f,
+ -0.03791408f, 0.02548251f, 0.01956050f, 0.19245651f, 0.13963254f,
+ -0.05904696f, -0.07424626f, -0.10411884f, 1.54176133e-003f,
+ 0.01797429f, 0.13025844f, 0.04547642f, -0.05710349f, -0.10697161f,
+ -0.13489437f, -0.06515755f, -0.06406886f, -4.08572936e-003f,
+ -0.01336483f, 0.04368737f, -0.11259720f, -0.05701635f, -0.06469971f,
+ -0.08346602f, -0.04166770f, -0.05795543f, -0.08247511f, -0.05742628f,
+ 0.08452254f, -0.03350224f, 0.13980860f, 0.13252275f, 0.07589617f,
+ 0.07539988f, 0.12155797f, 0.19087289f, 0.15050751f, 0.21250245f,
+ 0.14206800f, 0.01298489f, 0.07450245f, 0.06559097f, 0.01700557f,
+ 0.04512971f, 0.16950700f, 0.10261577f, 0.16389982f, 0.05505059f,
+ -0.03453077f, 0.08622462f, 0.07935954f, 0.03976260f, 0.02036091f,
+ 3.95744899e-003f, 0.03267065f, 0.15235919f, 0.01297494f, -0.08109194f,
+ 0.01407558f, 4.40693414e-003f, -0.15157418f, -0.11390478f,
+ -0.07487597f, -7.81322457e-003f, -0.02749545f, -0.10181408f,
+ 0.13755716f, 0.14007211f, 0.13482562f, 0.27517235f, 0.34251109f,
+ 0.07639657f, 0.07268607f, 0.19823882f, 0.16135791f, -0.04186463f,
+ -0.12784107f, -0.09846287f, 0.03169041f, 0.10974082f, -0.15051922f,
+ -0.08916726f, -0.07138767f, -0.04153349f, 6.25418453e-003f,
+ 0.01266654f, 0.10533249f, 0.12749144f, 0.15148053f, 0.01498513f,
+ 0.06305949f, -0.01247123f, -0.08778401f, -0.08551880f, -0.11955146f,
+ -0.08493572f, -0.02901620f, -0.02394859f, -0.13427313f, -0.11053200f,
+ -0.14413260f, -0.15203285f, 0.03972760f, -3.72127310e-004f,
+ -0.04200919f, 0.06105104f, 0.01904975f, -0.01106191f,
+ -7.27445772e-003f, -0.01520341f, 1.10228511e-003f, -0.04949187f,
+ -0.08013099f, 5.72071038e-003f, 0.08415454f, -0.06523152f, 0.03664081f,
+ -0.02673042f, -0.12066154f, -0.03702074f, 0.06006580f, 0.01628682f,
+ -6.17772620e-003f, 0.08192339f, -3.41629819e-003f, 0.02870512f,
+ 0.05807141f, 0.04959986f, 0.04618251f, -0.04901629f, -0.10579574f,
+ 0.02274442f, 0.12070961f, 2.23597488e-003f, 0.09831765f, -0.03019848f,
+ -0.11181970f, -0.04961075f, 0.02498928f, -0.03714991f, -0.01619653f,
+ 0.02643486f, -7.62964319e-003f, -0.02882290f, -0.06242594f,
+ -0.08439861f, 0.07220893f, 0.07263952f, 0.01561574f, 0.03091968f,
+ 0.01708712f, -0.03797151f, -3.18561122e-003f, 0.01624021f,
+ -0.02828573f, 0.11284444f, -1.32280716e-003f, -0.07784860f,
+ -0.07209100f, 0.03372242f, 0.12154529f, 0.02278104f, -0.05275500f,
+ -0.01918484f, 0.12989293f, 0.05424401f, 0.02333086f, 0.04029022f,
+ 0.12392918f, 0.09495489f, 0.09190340f, 0.07935889f, 8.76816828e-003f,
+ 0.17148446f, -8.51302687e-003f, -0.08011249f, -0.06796283f,
+ 0.04884845f, 0.01112272f, -0.07835306f, -1.14811445e-003f,
+ -0.03440760f, 0.02845243f, 0.07695542f, -0.07069533f, -0.01151784f,
+ -8.53884313e-003f, -0.01662786f, -0.04163864f, 0.05400505f,
+ 0.02859163f, 0.02921852f, 0.05003135f, -6.85718050e-003f, -0.01632611f,
+ 0.07780217f, 0.04042810f, -0.01216440f, 3.60914599e-003f, -0.06322435f,
+ 0.09516726f, 0.12877031f, -9.69162490e-003f, 0.01031179f, 0.05180895f,
+ -9.34659224e-003f, -0.01644533f, -0.04849347f, -0.04343236f,
+ 0.10514783f, 0.08046635f, -0.04615205f, -0.03975486f, -0.01485525f,
+ 0.13096830f, -0.01517950f, -0.06571898f, -0.04016372f, 0.01849786f,
+ 0.02439670f, 0.08067258f, 1.74824719e-003f, 0.07053747f, 0.08819518f,
+ -5.08352555e-003f, -0.06550863f, -0.08266170f, -0.07780605f,
+ 0.01453450f, -0.08756890f, 0.01096501f, -8.71319138e-003f, 0.10110464f,
+ 0.02420769f, -0.06708383f, 0.02007811f, 5.93133038e-003f, 0.05398923f,
+ 0.07538138f, 0.02049227f, 0.02242589f, 0.04011070f, -1.44875818e-003f,
+ -4.19115182e-003f, 0.06367654f, 0.02506934f, 0.02434536f, 0.05879405f,
+ -8.22952855e-003f, -0.01242441f, 0.04224926f, -0.01754923f,
+ 0.05958161f, 0.03818886f, -0.01830363f, -0.04308917f, -0.04422197f,
+ -0.02432721f, 0.02264866f, 2.03751423e-003f, 0.01197031f, 0.04439203f,
+ 0.12169247f, 0.03602713f, -0.02599251f, -1.98226492e-003f, 0.02046336f,
+ -0.02639058f, -1.91242550e-003f, -0.09334669f, -0.03595153f,
+ -9.88179818e-003f, -0.06848445f, -0.04666303f, -0.09955736f,
+ -0.04206430f, 0.02609075f, 9.09005292e-003f, -0.07138551f,
+ -4.22313227e-004f, 0.01766645f, 0.02756404f, 0.01308276f, 0.04052891f,
+ 0.02387515f, 0.05337298f, 0.02500631f, -0.04970853f, -0.12467445f,
+ 0.17604403f, 0.12256411f, -0.07512254f, 8.70451052e-003f, -0.05697548f,
+ -0.03626474f, -8.76623299e-003f, -0.01210897f, -0.09451522f,
+ 0.07490732f, -0.02008001f, -0.02681278f, -0.06463405f, -0.01517507f,
+ 7.33757764e-003f, 6.07147906e-003f, -0.09316964f, -0.04575328f,
+ 0.13261597f, 0.15424870f, -0.01655918f, -0.02772390f, -0.05243644f,
+ -0.02356456f, -0.02351753f, -0.10211615f, -0.12873036f, 0.14549787f,
+ 0.12519856f, 4.38762689e-003f, 0.02795992f, 0.05170322f, 0.09223596f,
+ 0.05890015f, 0.02376701f, -0.02777346f, 0.09506908f, 0.02328936f,
+ -0.02319928f, -0.03218696f, -0.01527841f, -0.01016694f, -0.02674719f,
+ 0.05137179f, 0.01980666f, 0.06544447f, -0.01746171f, 0.01026380f,
+ 0.01561806f, 7.97004555e-004f, 0.07601810f, 0.01907250f, -0.03083035f,
+ -0.05987392f, 0.09242783f, 0.14555025f, 0.01035827f, 0.03092401f,
+ -0.09562709f, -0.03802354f, 0.02531144f, 0.03079449f, -0.07100715f,
+ 0.03330721f, -2.69116857e-003f, 0.03167490f, 0.05744999f, 0.03259895f,
+ 1.91266940e-003f, 0.03194578f, 0.07389776f, 0.02198060f, 0.07633314f,
+ 0.03293105f, -0.09103648f, 0.04718142f, 0.06102672f, -0.01003063f,
+ 5.85481385e-003f, -0.01522574f, 0.02323526f, 0.10584345f,
+ 4.35879454e-003f, 0.06107873f, 0.05868603f, -0.03115531f, 0.01214679f,
+ 0.08567052f, 3.93926632e-003f, -0.02521488f, -1.88425183e-003f,
+ 0.02038053f, -6.26854831e-004f, 0.04897438f, -0.04280585f,
+ -0.04819689f, -0.04812867f, -0.01451186f, 0.05101469f,
+ -9.01125465e-003f, -0.03333859f, 0.03917955f, 0.04196448f, 0.04292135f,
+ 0.02809529f, 0.02999715f, 0.04081348f, 9.10039060e-003f, 0.09703232f,
+ 0.10379741f, 0.02348725f, -4.72756615e-003f, 0.01027325f, 0.10402658f,
+ 0.12071823f, 0.09817299f, -0.02612033f, 0.03638414f, 0.05896405f,
+ 0.04865025f, 0.04793910f, -0.03882321f, -0.02962117f, -0.01222268f,
+ 0.04071597f, 0.01922777f, -0.02287866f, 0.03328381f, 0.01859092f,
+ 0.09024994f, 0.03804455f, -0.01424510f, 0.01953739f, 0.02509617f,
+ -0.03390914f, -0.05663941f, -0.01641979f, 0.05848591f, 0.04639670f,
+ 0.02092116f, 0.12911791f, 0.19918139f, 0.07739855f, -7.25806039e-003f,
+ 0.04074838f, 0.03183993f, 1.39251316e-003f, -0.01428625f, 0.01865480f,
+ 0.08529541f, 0.13547510f, 0.11189661f, 0.03998901f, 0.09575938f,
+ -0.02631102f, -0.03458253f, -0.04749985f, -0.06070716f,
+ 4.71884012e-003f, 0.06445789f, -0.02450038f, -0.05483776f,
+ -0.04657237f, -0.02030717f, -0.03480766f, -0.09397731f, -0.06399718f,
+ -0.01804585f, 5.62348310e-003f, -6.64811488e-003f, -0.06517869f,
+ 6.96210237e-003f, -0.01860148f, -0.04245830f, -0.05850367f,
+ -3.24417115e-003f, 0.07700698f, 0.11290991f, 0.09923030f, -0.02970599f,
+ 0.05592411f, 0.04813979f, -0.09811195f, -0.09357996f, -0.03276114f,
+ 0.05218338f, 0.04141375f, 3.92977800e-003f, -0.05047480f, 0.15960084f,
+ 0.04612800f, -0.03114098f, -0.04650044f, -0.03249795f, -0.02425641f,
+ -0.04311355f, 0.04307659f, -0.09401883f, -0.04742785f, -0.01254499f,
+ -0.06598741f, 3.41369561e-003f, -0.05620445f, -7.28127593e-003f,
+ -0.05998361f, -0.03274450f, -0.07376868f, 3.19015374e-003f,
+ -0.07733069f, 0.05815864f, -0.02471071f, 0.03850617f, 0.13838784f,
+ 0.15399861f, 0.01731321f, -0.01477586f, 0.10393341f, 0.05159833f,
+ -0.01945555f, -0.03427503f, -0.04867341f, 0.09237480f, 0.10732719f,
+ 0.06071450f, -0.01355071f, 0.01844356f, -0.03480803f, -0.03796671f,
+ 2.15628621e-004f, -0.05440186f, 0.01889855f, -0.01443413f,
+ -0.02607902f, -0.02938001f, 0.02720689f, -0.06228397f, -0.02970936f,
+ -0.03426210f, -0.10280876f, -0.06739304f, -0.05227850f, 0.03360292f,
+ -0.11278441f, -0.06966180f, -0.13937433f, 9.10932291e-003f,
+ 2.52020749e-004f, -4.07359656e-003f, 0.12310639f, 0.09343060f,
+ 0.07302511f, 0.03222093f, 0.07532879f, 0.03792387f, -0.04985180f,
+ 0.01804602f, 0.02694195f, 0.13481498f, 0.04601225f, 0.04106982f,
+ 0.08511057f, 0.12314661f, 0.01320830f, 0.05044121f, -5.52943908e-003f,
+ -0.08992624f, -0.02249301f, -0.08181777f, 0.06165213f, -0.03256603f,
+ -0.01068920f, -0.01323473f, -0.11970232f, -0.04616347f, -0.12088681f,
+ -0.06762606f, -0.08676834f, -0.06434575f, 0.01772529f, 0.03469615f,
+ -0.10926618f, 0.03013873f, 0.14030397f, 0.16130108f, 0.17985588f,
+ 0.11281928f, 0.10530639f, 0.08905948f, 0.07733764f, 0.06695238f,
+ 0.02142088f, 0.06438877f, 0.09794453f, 0.05745072f, 0.02788557f,
+ 0.02632830f, 0.07985807f, 4.24902979e-003f, 8.47890321e-003f,
+ -0.02679466f, -5.28812688e-003f, -0.02162580f, -0.07490715f,
+ -0.08251337f, -0.02056576f, -0.01026194f, -1.15492963e-003f,
+ -5.75720915e-004f, -0.07210591f, -0.07320981f, -0.04883312f,
+ -0.10897151f, -0.07477258f, -0.08867134f, -0.09222437f, -0.10924666f,
+ -0.10430276f, 0.07953499f, 0.02767959f, 0.11393359f, 0.18779543f,
+ 0.03313421f, 0.02143700f, 0.05852016f, -2.12067598e-003f,
+ -3.76984011e-003f, 0.02774167f, -0.03124610f, 0.01465141f, 0.01616004f,
+ -0.01391913f, -0.04404102f, -0.05444227f, -0.14684731f, -0.15016587f,
+ 0.04509468f, 1.29563001e-003f, 0.01398350f, 0.05610404f, -0.04868806f,
+ -0.04776716f, -8.16873740e-003f, -2.30126386e-003f, -0.02286313f,
+ 0.11983398f, -0.04703261f, -0.08814441f, -0.07585249f, -0.10799607f,
+ -0.03232087f, 0.01509786f, -0.04843464f, -0.03967846f, 0.09589416f,
+ 0.01352560f, -0.01458119f, 0.01050829f, -0.03038946f, 0.01608388f,
+ 1.11975556e-003f, -0.01250656f, 2.86211423e-003f, 0.04333691f,
+ -0.14603497f, -0.01946543f, -0.02327525f, -0.01973944f, 0.07944400f,
+ -0.02224544f, -0.06701808f, 0.03476532f, 0.11505594f, -0.02712801f,
+ -0.01665113f, 0.06315716f, -0.08205860f, 0.07431999f, 0.04915778f,
+ -0.04468752f, -0.01490402f, 0.07400476f, -0.11650901f, 0.05102430f,
+ 0.04559118f, -0.05916039f, 0.08840760f, -0.01587902f, -0.14890194f,
+ 0.07857784f, 0.04710254f, -0.05381983f, -0.07331945f, -0.03604643f,
+ 0.15611970f, 0.07649943f, -0.05959348f, -0.02776607f, 0.11098688f,
+ 0.03758875f, -0.04446875f, 0.04933187f, 0.01345535f, 0.06921103f,
+ 0.07364785f, 0.05518956f, 0.02899585f, 0.09375840f, 0.10518434f,
+ -0.04420241f, 0.01915282f, -3.56386811e-003f, 0.14586878f, 0.10286101f,
+ -0.04360626f, -0.12723237f, 0.09076386f, 0.11119842f, -0.06035013f,
+ 0.09674817f, 0.08938243f, 0.07065924f, 0.02603180f, 5.84815582e-003f,
+ -0.05922065f, 0.12360309f, 3.59695964e-003f, 2.99844006e-003f,
+ 0.03697936f, 0.02043072f, 0.04168725f, 0.01025975f, -0.01359980f,
+ -0.01600920f, 0.02581056f, 0.02329250f, 2.98100687e-003f, 0.01629762f,
+ 0.06652115f, 0.05855627f, 0.01237463f, -0.01297135f, 0.01761587f,
+ 0.05090865f, 0.06549342f, -0.04425945f, 2.43203156e-003f,
+ 3.07327788e-003f, 0.06678630f, -0.04303836f, 0.01082393f, -0.06476044f,
+ 0.04077786f, 0.12441979f, 0.08237778f, 0.07424165f, 0.04065890f,
+ 0.06905543f, 0.09556347f, 0.12724875f, -0.02132082f, 0.08514154f,
+ -0.04175328f, -0.02666954f, 0.01897836f, 0.03317382f, 9.45465732e-003f,
+ -0.01238974f, -0.04242500f, -0.01419479f, -0.03545213f, -0.02440874f,
+ 0.08684119f, 0.04212951f, 0.02462858f, -0.01104825f, -5.01706870e-003f,
+ 0.02968982f, 0.02597476f, -0.01568939f, 0.04514892f, 0.06974549f,
+ 0.08670278f, 0.06828108f, 0.10238872f, 0.05405957f, 0.06548470f,
+ -0.03763957f, 0.01366090f, 0.07069602f, 0.05363748f, 0.04798120f,
+ 0.11706422f, 0.05466456f, -0.01869259f, 0.06344382f, 0.03106543f,
+ 0.08432506f, -0.02061096f, 0.03821088f, -6.92190882e-003f,
+ 6.40467042e-003f, -0.01271779f, 6.89014705e-005f, 0.04541415f,
+ -0.01899539f, -0.05020239f, 0.03000903f, 0.01090422f, 4.52452758e-003f,
+ 0.02573632f, -0.02388454f, -0.04200457f, 1.72783900e-003f,
+ -0.05978370f, -0.02720562f, 0.06573715f, 0.01154317f, 0.01265615f,
+ 0.07375994f, -9.19828378e-003f, -0.04914120f, 0.02124831f, 0.06455322f,
+ 0.04372910f, -0.03310043f, 0.03605788f, -6.78055827e-003f,
+ 9.36202332e-003f, 0.01747596f, -0.06406314f, -0.06812935f, 0.08080816f,
+ -0.02778088f, 0.02735260f, 0.06393493f, 0.06652229f, 0.05676993f,
+ 0.08640018f, -7.59188086e-003f, -0.02012847f, -0.04741159f,
+ -0.01657069f, -0.01624399f, 0.05547778f, -2.33309763e-003f,
+ 0.01120033f, 0.06141156f, -0.06285004f, -0.08732341f, -0.09313398f,
+ -0.04267832f, 5.57443965e-003f, 0.04809862f, 0.01773641f,
+ 5.37361018e-003f, 0.14842421f, -0.06298012f, -0.02935147f, 0.11443478f,
+ -0.05034208f, 5.65494271e-003f, 0.02076526f, -0.04577984f,
+ -0.04735741f, 0.02961071f, -0.09307127f, -0.04417921f, -0.04990027f,
+ -0.03940028f, 0.01306016f, 0.06267900f, 0.03758737f, 0.08460117f,
+ 0.13858789f, 0.04862388f, -0.06319809f, -0.05655516f, 0.01885816f,
+ -0.03285607f, 0.03371567f, -0.07040928f, -0.04514049f, 0.01392166f,
+ 0.08184422f, -0.07230316f, 0.02386871f, 0.02184591f, 0.02605764f,
+ -0.01033954f, 9.29878280e-003f, 7.67351175e-003f, 0.15189242f,
+ 0.02069071f, -0.09738296f, -0.08894105f, -0.07768748f, 0.02332268f,
+ -0.01778995f, -0.03258888f, -0.08180822f, -0.08492987f, 0.02290156f,
+ -0.11368170f, -0.03554465f, -0.04533844f, -0.02861580f, 0.06782424f,
+ 0.01113123f, 0.02453644f, 0.12721945f, 0.08084814f, -0.03607795f,
+ 0.01109122f, 0.04803548f, -0.03489929f, 0.03399536f, -0.05682014f,
+ 8.59533902e-003f, -4.27904585e-003f, 0.03230887f, -0.01300198f,
+ -0.01038137f, -0.07930113f, 8.33097473e-003f, 0.02296994f,
+ -0.01306500f, -0.01881626f, 0.04413369f, 0.05729880f, -0.03761553f,
+ 0.01942326f, 1.64540811e-003f, -0.03811319f, 0.04190650f, -0.14978096f,
+ -0.04514487f, 0.01209545f, -5.46460645e-003f, -0.01647195f,
+ 7.63064111e-003f, -0.07494587f, 0.08415288f, 0.10020141f, -0.01228561f,
+ 0.06553826f, 0.04554005f, 0.07890417f, 0.03041138f, 0.01752007f,
+ 0.09208256f, -3.74419295e-004f, 0.10549527f, 0.04686913f, 0.01894833f,
+ -0.02651412f, -4.34682379e-003f, 5.44942822e-003f, 0.01444484f,
+ 0.05882156f, -0.03336544f, 0.04603891f, -0.10432546f, 0.01923928f,
+ 0.01842845f, -0.01712168f, -0.02222766f, 0.04693324f, -0.06202956f,
+ -0.01422159f, 0.08732220f, -0.07706107f, 0.02661049f, -0.04300238f,
+ -0.03092422f, -0.03552184f, -0.01886088f, -0.04979934f, 0.03906401f,
+ 0.04608644f, 0.04966111f, 0.04275464f, -0.04621769f, -0.02653212f,
+ 8.57011229e-003f, 0.03839684f, 0.05818764f, 0.03880796f,
+ -2.76100676e-004f, 0.03076511f, -0.03266929f, -0.05374557f,
+ 0.04986527f, -9.45429131e-003f, 0.03582499f, -2.64564669e-003f,
+ -1.07461517e-003f, 0.02962313f, -0.01483363f, 0.03060869f, 0.02448327f,
+ 0.01845641f, 0.03282966f, -0.03534438f, -0.01084059f, -0.01119136f,
+ -1.85360224e-003f, -5.94652840e-004f, -0.04451817f, 2.98327743e-003f,
+ 0.06272484f, -0.02152076f, -3.05971340e-003f, -0.05070828f,
+ 0.01531762f, 0.01282815f, 0.05167150f, 9.46266949e-003f,
+ -3.34558333e-003f, 0.11442288f, -0.03906701f, -2.67325155e-003f,
+ 0.03069184f, -0.01134165f, 0.02949462f, 0.02879886f, 0.03855566f,
+ -0.03450781f, 0.09142872f, -0.02156654f, 0.06075062f, -0.06220816f,
+ 0.01944680f, 6.68372354e-003f, -0.06656796f, 8.70784000e-003f,
+ 0.03456013f, 0.02434320f, -0.13236357f, -0.04177035f, -0.02069627f,
+ 0.01068112f, 0.01505432f, -0.07517391f, -3.83571628e-003f,
+ -0.06298508f, -0.02881260f, -0.13101046f, -0.07221562f,
+ -5.79945277e-003f, -8.57300125e-003f, 0.03782469f, 0.02762164f,
+ 0.04942456f, -0.02936396f, 0.09597211f, 0.01921411f, 0.06101191f,
+ -0.04787507f, -0.01379578f, -7.40224449e-003f, -0.02220136f,
+ -0.01313756f, 7.77558051e-003f, 0.12296968f, 0.02939998f, 0.03594062f,
+ -0.07788624f, -0.01133144f, 3.99316690e-004f, -0.06090347f,
+ -0.01122066f, -4.68682544e-003f, 0.07633100f, -0.06748922f,
+ -0.05640298f, -0.05265681f, -0.01139122f, -0.01624347f, -0.04715714f,
+ -0.01099092f, 0.01048561f, 3.28499987e-003f, -0.05810167f,
+ -0.07699911f, -0.03330683f, 0.04185145f, 0.03478536f, 0.02275165f,
+ 0.02304766f, 6.66040834e-003f, 0.10968148f, -5.93013782e-003f,
+ -0.04858336f, -0.04203213f, -0.09316786f, -6.13074889e-003f,
+ -0.02544625f, 0.01366201f, 9.18555818e-003f, -0.01846578f,
+ -0.05622401f, -0.03989377f, -0.07810296f, 6.91275718e-003f,
+ 0.05957597f, -0.03901334f, 0.01572002f, -0.01193903f,
+ -6.89400872e-003f, -0.03093356f, -0.04136098f, -0.01562869f,
+ -0.04604580f, 0.02865234f, -0.08678447f, -0.03232484f, -0.05364593f,
+ -0.01445016f, -0.07003860f, -0.08669746f, -0.04520775f, 0.04274122f,
+ 0.03117515f, 0.08175703f, 0.01081109f, 0.06379741f, 0.06199206f,
+ 0.02865988f, 0.02360346f, 0.06725410f, -0.03248780f, -9.37702879e-003f,
+ 0.08265898f, -0.02245839f, 0.05125763f, -0.01862395f, 0.01973453f,
+ -0.01994494f, -0.10770868f, 0.03180375f, 3.23935156e-003f,
+ -0.02142080f, -0.04256190f, 0.04760900f, 0.04282863f, 0.05635953f,
+ -0.01870849f, 0.05540622f, -0.03042666f, 0.01455277f, -0.06630179f,
+ -0.05843807f, -0.03739681f, -0.09739155f, -0.03220233f, -0.05620182f,
+ -0.10381401f, 0.07400211f, 4.20676917e-003f, 0.03258535f,
+ 2.14308966e-003f, 0.05121966f, -0.01274337f, 0.02384761f, 0.06335578f,
+ -0.07905591f, 0.08375625f, -0.07898903f, -0.06508528f, -0.02498444f,
+ 0.06535810f, 0.03970535f, 0.04895468f, -0.01169566f, -0.03980601f,
+ 0.05682293f, 0.05925463f, -0.01165808f, -0.07936699f, -0.04208954f,
+ 0.01333987f, 0.09051196f, 0.10098671f, -0.03974256f, 0.01238771f,
+ -0.07501741f, -0.03655440f, -0.04301528f, 0.09216860f,
+ 4.63579083e-004f, 0.02851115f, 0.02142735f, 1.28244064e-004f,
+ 0.02879687f, -0.08554889f, -0.04838862f, 0.08135369f, -0.05756533f,
+ 0.01413900f, 0.03451880f, -0.06619488f, -0.03053130f, 0.02961676f,
+ -0.07384635f, 0.01135692f, 0.05283910f, -0.07778034f, -0.02107482f,
+ -0.05511716f, -0.13473752f, 0.03030157f, 0.06722020f, -0.06218817f,
+ -0.05826827f, 0.06254654f, 0.02895772f, -0.01664000f, -0.03620280f,
+ -0.01612278f, -1.46097376e-003f, 0.14013411f, -8.96181818e-003f,
+ -0.03250246f, 3.38630192e-003f, 2.64779478e-003f, 0.03359732f,
+ -0.02411991f, -0.04229729f, 0.10666174f, -6.66579151f
+ };
+ return vector<float>(detector, detector + sizeof(detector) / sizeof(detector[0]));
}
/* Returns the nearest upper power of two, works only for
}
void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y,
- int nblocks_win_x, int nblocks_win_y)
+ int nblocks_win_x, int nblocks_win_y)
{
cnbins = nbins;
cblock_stride_x = block_stride_x;
}
void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y,
- int height, int width, const cv::ocl::oclMat& grad,
- const cv::ocl::oclMat& qangle, float sigma, cv::ocl::oclMat& block_hists)
+ int height, int width, const cv::ocl::oclMat &grad,
+ const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists)
{
Context *clCxt = Context::getContext();
- string kernelName = "compute_hists_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "compute_hists_kernel";
+ vector< pair<size_t, const void *> > args;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
}
void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y,
- int height, int width, cv::ocl::oclMat& block_hists, float threshold)
+ int height, int width, cv::ocl::oclMat &block_hists, float threshold)
{
Context *clCxt = Context::getContext();
- string kernelName = "normalize_hists_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "normalize_hists_kernel";
+ vector< pair<size_t, const void *> > args;
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int nthreads = power_2up(block_hist_size);
}
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y,
- int block_stride_x, int win_stride_y, int win_stride_x, int height,
- int width, const cv::ocl::oclMat& block_hists, const cv::ocl::oclMat& coefs, float free_coef,
- float threshold, cv::ocl::oclMat& labels)
+ int block_stride_x, int win_stride_y, int win_stride_x, int height,
+ int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef,
+ float threshold, cv::ocl::oclMat &labels)
{
Context *clCxt = Context::getContext();
- string kernelName = "classify_hists_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "classify_hists_kernel";
+ vector< pair<size_t, const void *> > args;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
}
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width,
- const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors)
+ int win_stride_y, int win_stride_x, int height, int width,
+ const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
{
Context *clCxt = Context::getContext();
- string kernelName = "extract_descrs_by_rows_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "extract_descrs_by_rows_kernel";
+ vector< pair<size_t, const void *> > args;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
}
void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
- int win_stride_y, int win_stride_x, int height, int width,
- const cv::ocl::oclMat& block_hists, cv::ocl::oclMat& descriptors)
+ int win_stride_y, int win_stride_x, int height, int width,
+ const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors)
{
Context *clCxt = Context::getContext();
- string kernelName = "extract_descrs_by_cols_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "extract_descrs_by_cols_kernel";
+ vector< pair<size_t, const void *> > args;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
return (total + grain - 1) / grain;
}
-void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat& img,
- float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma)
+void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img,
+ float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
{
Context *clCxt = Context::getContext();
- string kernelName = "compute_gradients_8UC1_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "compute_gradients_8UC1_kernel";
+ vector< pair<size_t, const void *> > args;
size_t localThreads[3] = { NTHREADS, 1, 1 };
size_t globalThreads[3] = { width, height, 1 };
openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat& img,
- float angle_scale, cv::ocl::oclMat& grad, cv::ocl::oclMat& qangle, bool correct_gamma)
+void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img,
+ float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma)
{
Context *clCxt = Context::getContext();
- string kernelName = "compute_gradients_8UC4_kernel";
- vector< pair<size_t, const void *> > args;
+ string kernelName = "compute_gradients_8UC4_kernel";
+ vector< pair<size_t, const void *> > args;
size_t localThreads[3] = { NTHREADS, 1, 1 };
size_t globalThreads[3] = { width, height, 1 };
-
+
char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step >> 2;
int grad_quadstep = grad.step >> 3;
{
throw_nogpu();
}
-void cv::ocl::remap(const oclMat&, oclMat&, oclMat&, oclMat&, int, int ,const Scalar&) { throw_nogpu(); }
+void cv::ocl::remap(const oclMat &, oclMat &, oclMat &, oclMat &, int, int , const Scalar &)
+{
+ throw_nogpu();
+}
void cv::ocl::copyMakeBorder(const oclMat &, oclMat &, int, int, int, int, const Scalar &)
{
{
throw_nogpu();
}
-void cv::ocl::convolve(const oclMat&, const oclMat&, oclMat&)
+void cv::ocl::convolve(const oclMat &, const oclMat &, oclMat &)
{
throw_nogpu();
}
extern const char *imgproc_bilateral;
extern const char *imgproc_calcHarris;
extern const char *imgproc_calcMinEigenVal;
- extern const char *imgproc_convolve;
+ extern const char *imgproc_convolve;
////////////////////////////////////OpenCL call wrappers////////////////////////////
template <typename T> struct index_and_sizeof;
args.push_back( make_pair(sizeof(cl_uchar), (void *)&thresh_uchar));
args.push_back( make_pair(sizeof(cl_uchar), (void *)&max_val));
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
- openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
void threshold_32f(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type)
args.push_back( make_pair(sizeof(cl_float), (void *)&thresh_f));
args.push_back( make_pair(sizeof(cl_float), (void *)&max_val));
args.push_back( make_pair(sizeof(cl_int), (void *)&type));
- openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_threshold, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
return thresh;
}
- ////////////////////////////////////////////////////////////////////////////////////////////
- /////////////////////////////// remap //////////////////////////////////////////////////
- ////////////////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////// remap //////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////
- void remap( const oclMat& src, oclMat& dst, oclMat& map1, oclMat& map2, int interpolation, int borderType, const Scalar& borderValue )
+ void remap( const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int borderType, const Scalar &borderValue )
{
Context *clCxt = src.clCxt;
- CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
- || interpolation == INTER_CUBIC || interpolation== INTER_LANCZOS4);
- CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type()== CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
- CV_Assert(!map2.data || map2.size()== map1.size());
+ CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
+ || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);
+ CV_Assert((map1.type() == CV_16SC2 && !map2.data) || (map1.type() == CV_32FC2 && !map2.data) || (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
+ CV_Assert(!map2.data || map2.size() == map1.size());
CV_Assert(dst.size() == map1.size());
dst.create(map1.size(), src.type());
kernelName = "remapNNSConstant";
}
- else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
+ else if(map1.type() == CV_32FC1 && map2.type() == CV_32FC1)
{
if(interpolation == INTER_LINEAR && borderType == BORDER_CONSTANT)
kernelName = "remapLNF1Constant";
kernelName = "remapNNF1Constant";
}
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
- int type = src.type();
- size_t blkSizeX = 16, blkSizeY = 16;
- size_t glbSizeX;
+ int type = src.type();
+ size_t blkSizeX = 16, blkSizeY = 16;
+ size_t glbSizeX;
int cols = dst.cols;
- if(src.type() == CV_8UC1)
+ if(src.type() == CV_8UC1)
{
- cols = (dst.cols + dst.offset%4 + 3)/4;
- glbSizeX = cols %blkSizeX==0 ? cols : (cols/blkSizeX+1)*blkSizeX;
-
+ cols = (dst.cols + dst.offset % 4 + 3) / 4;
+ glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
+
}
- else if(src.type() == CV_8UC4 || src.type() == CV_32FC1)
+ else if(src.type() == CV_8UC3 || src.type() == CV_8UC4 || src.type() == CV_32FC1)
{
- cols = (dst.cols + (dst.offset>>2)%4 + 3)/4;
- glbSizeX = cols %blkSizeX==0 ? cols : (cols/blkSizeX+1)*blkSizeX;
+ cols = (dst.cols + (dst.offset >> 2) % 4 + 3) / 4;
+ glbSizeX = cols % blkSizeX == 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
else
{
- glbSizeX = dst.cols%blkSizeX==0 ? dst.cols : (dst.cols/blkSizeX+1)*blkSizeX;
-
+ glbSizeX = dst.cols % blkSizeX == 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
+
}
- size_t glbSizeY = dst.rows%blkSizeY==0 ? dst.rows : (dst.rows/blkSizeY+1)*blkSizeY;
- size_t globalThreads[3] = {glbSizeX,glbSizeY,1};
- size_t localThreads[3] = {blkSizeX,blkSizeY,1};
+ size_t glbSizeY = dst.rows % blkSizeY == 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
+ size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
+ size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
/*
/////////////////////////////
//using the image buffer
/////////////////////////////
-
+
size_t image_row_pitch = 0;
cl_int err1, err2, err3;
cl_mem_flags flags1 = CL_MEM_READ_ONLY;
printf("Error code %d \n", err3);
return;
}
- // clWaitForEvents(1, &BtoI_event);
-
+ // clWaitForEvents(1, &BtoI_event);
+
cl_int ret;
Mat test(src.rows, src.cols, CV_8UC1);
memset(test.data, 0, src.rows*src.cols);
vector< pair<size_t, const void *> > args;
if(map1.channels() == 2)
{
- args.push_back( make_pair(sizeof(cl_mem),(void*)&dst.data));
- args.push_back( make_pair(sizeof(cl_mem),(void*)&src.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
// args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer
- args.push_back( make_pair(sizeof(cl_mem),(void*)&map1.data));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.rows));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.rows));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if(src.clCxt -> impl -> double_support != 0)
{
- args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue));
+ args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
else
{
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
- args.push_back( make_pair(sizeof(cl_float4),(void*)&borderFloat));
+ float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
+ args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}
if(map1.channels() == 1)
{
- args.push_back( make_pair(sizeof(cl_mem),(void*)&dst.data));
- args.push_back( make_pair(sizeof(cl_mem),(void*)&src.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
// args.push_back( make_pair(sizeof(cl_mem),(void*)&srcImage)); //imageBuffer
- args.push_back( make_pair(sizeof(cl_mem),(void*)&map1.data));
- args.push_back( make_pair(sizeof(cl_mem),(void*)&map2.data));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.offset));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.step));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&src.rows));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&dst.rows));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.cols));
- args.push_back( make_pair(sizeof(cl_int),(void*)&map1.rows));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&map1.data));
+ args.push_back( make_pair(sizeof(cl_mem), (void *)&map2.data));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.offset));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.step));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
+ args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
if(src.clCxt -> impl -> double_support != 0)
{
- args.push_back( make_pair(sizeof(cl_double4),(void*)&borderValue));
+ args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
}
else
{
- float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
- args.push_back( make_pair(sizeof(cl_float4),(void*)&borderFloat));
+ float borderFloat[4] = {(float)borderValue[0], (float)borderValue[1], (float)borderValue[2], (float)borderValue[3]};
+ args.push_back( make_pair(sizeof(cl_float4), (void *)&borderFloat));
}
}
- openCLExecuteKernel(clCxt,&imgproc_remap,kernelName,globalThreads,localThreads,args,src.channels(),src.depth());
- }
-
+ openCLExecuteKernel(clCxt, &imgproc_remap, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
+ }
+
////////////////////////////////////////////////////////////////////////////////////////////
// resize
float ify = 1. / fy;
double ifx_d = 1. / fx;
double ify_d = 1. / fy;
- int srcStep_in_pixel = src.step1() / src.channels();
- int srcoffset_in_pixel = src.offset / src.elemSize();
- int dstStep_in_pixel = dst.step1() / dst.channels();
- int dstoffset_in_pixel = dst.offset / dst.elemSize();
- //printf("%d %d\n",src.step1() , dst.elemSize());
+ int srcStep_in_pixel = src.step1() / src.oclchannels();
+ int srcoffset_in_pixel = src.offset / src.elemSize();
+ int dstStep_in_pixel = dst.step1() / dst.oclchannels();
+ int dstoffset_in_pixel = dst.offset / dst.elemSize();
+ //printf("%d %d\n",src.step1() , dst.elemSize());
string kernelName;
if(interpolation == INTER_LINEAR)
kernelName = "resizeLN";
if(src.type() == CV_8UC1)
{
size_t cols = (dst.cols + dst.offset % 4 + 3) / 4;
- glbSizeX = cols % blkSizeX == 0 && cols != 0? cols : (cols / blkSizeX + 1) * blkSizeX;
+ glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
}
else
{
- glbSizeX = dst.cols % blkSizeX == 0 && dst.cols !=0? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
+ glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
}
- size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
+ size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
if(src.clCxt -> impl -> double_support != 0)
{
- args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
- args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
+ args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
+ args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
}
else
{
- args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
- args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
+ args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
+ args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
}
}
else
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
}
- openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
void resize(const oclMat &src, oclMat &dst, Size dsize,
double fx, double fy, int interpolation)
{
- CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4
- || src.type() == CV_32FC1 || src.type() == CV_32FC4);
+ CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
+ || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
CV_Assert( src.size().area() > 0 );
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
{
if(dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy))
{
- CV_Error(CV_StsUnmatchedSizes,"invalid dsize and fx, fy!");
+ CV_Error(CV_StsUnmatchedSizes, "invalid dsize and fx, fy!");
}
}
if( dsize == Size() )
return medianFilter(src1, dst, m);
}
- int srcStep = src.step1() / src.channels();
- int dstStep = dst.step1() / dst.channels();
- int srcOffset = src.offset / src.channels() / src.elemSize1();
- int dstOffset = dst.offset / dst.channels() / dst.elemSize1();
+ int srcStep = src.step1() / src.oclchannels();
+ int dstStep = dst.step1() / dst.oclchannels();
+ int srcOffset = src.offset / src.oclchannels() / src.elemSize1();
+ int dstOffset = dst.offset / dst.oclchannels() / dst.elemSize1();
Context *clCxt = src.clCxt;
string kernelName = "medianFilter";
if(m == 3)
{
string kernelName = "medianFilter3";
- openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
else if(m == 5)
{
string kernelName = "medianFilter5";
- openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_median, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
else
{
//string kernelName = "medianFilter";
//args.push_back( make_pair( sizeof(cl_int),(void*)&m));
- //openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.channels(),-1);
+ //openCLExecuteKernel(clCxt,&imgproc_median,kernelName,globalThreads,localThreads,args,src.oclchannels(),-1);
}
}
// copyMakeBorder
void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int bordertype, const Scalar &scalar)
{
- //CV_Assert(src.channels() != 2);
+ //CV_Assert(src.oclchannels() != 2);
CV_Assert(top >= 0 && bottom >= 0 && left >= 0 && right >= 0);
- if((dst.cols!=dst.wholecols) || (dst.rows!=dst.wholerows))//has roi
- {
- if(((bordertype & cv::BORDER_ISOLATED) == 0) &&
- (bordertype != cv::BORDER_CONSTANT) &&
- (bordertype != cv::BORDER_REPLICATE))
- {
- CV_Error(CV_StsBadArg,"unsupported border type");
- }
- }
- bordertype &= ~cv::BORDER_ISOLATED;
- if((bordertype == cv::BORDER_REFLECT) || (bordertype == cv::BORDER_WRAP))
- {
- CV_Assert((src.cols>=left) && (src.cols>=right) && (src.rows >= top) && (src.rows >= bottom));
- }
- if(bordertype == cv::BORDER_REFLECT_101)
- {
- CV_Assert((src.cols>left) && (src.cols>right) && (src.rows > top) && (src.rows > bottom));
- }
+ if((dst.cols != dst.wholecols) || (dst.rows != dst.wholerows)) //has roi
+ {
+ if(((bordertype & cv::BORDER_ISOLATED) == 0) &&
+ (bordertype != cv::BORDER_CONSTANT) &&
+ (bordertype != cv::BORDER_REPLICATE))
+ {
+ CV_Error(CV_StsBadArg, "unsupported border type");
+ }
+ }
+ bordertype &= ~cv::BORDER_ISOLATED;
+ if((bordertype == cv::BORDER_REFLECT) || (bordertype == cv::BORDER_WRAP))
+ {
+ CV_Assert((src.cols >= left) && (src.cols >= right) && (src.rows >= top) && (src.rows >= bottom));
+ }
+ if(bordertype == cv::BORDER_REFLECT_101)
+ {
+ CV_Assert((src.cols > left) && (src.cols > right) && (src.rows > top) && (src.rows > bottom));
+ }
dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
- int srcStep = src.step1() / src.channels();
- int dstStep = dst.step1() / dst.channels();
+ int srcStep = src.step1() / src.oclchannels();
+ int dstStep = dst.step1() / dst.oclchannels();
int srcOffset = src.offset / src.elemSize();
int dstOffset = dst.offset / dst.elemSize();
- int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101};
- const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"};
- int bordertype_index;
- for(bordertype_index=0;bordertype_index<sizeof(__bordertype) / sizeof(int); bordertype_index++)
- {
- if(__bordertype[bordertype_index]==bordertype)
- break;
- }
- if(bordertype_index==sizeof(__bordertype) / sizeof(int))
- {
- CV_Error(CV_StsBadArg,"unsupported border type");
- }
+ int __bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, BORDER_REFLECT, BORDER_WRAP, BORDER_REFLECT_101};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
+ int bordertype_index;
+ for(bordertype_index = 0; bordertype_index < sizeof(__bordertype) / sizeof(int); bordertype_index++)
+ {
+ if(__bordertype[bordertype_index] == bordertype)
+ break;
+ }
+ if(bordertype_index == sizeof(__bordertype) / sizeof(int))
+ {
+ CV_Error(CV_StsBadArg, "unsupported border type");
+ }
string kernelName = "copymakeborder";
- size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3] = {(dst.cols + localThreads[0]-1) / localThreads[0] * localThreads[0],
- (dst.rows + localThreads[1]-1) / localThreads[1] * localThreads[1], 1};
-
+ size_t localThreads[3] = {16, 16, 1};
+ size_t globalThreads[3] = {(dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
+ (dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1
+ };
+
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&srcOffset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstOffset));
args.push_back( make_pair( sizeof(cl_int), (void *)&top));
args.push_back( make_pair( sizeof(cl_int), (void *)&left));
- char compile_option[64];
- union sc
- {
- cl_uchar4 uval;
- cl_char4 cval;
- cl_ushort4 usval;
- cl_short4 shval;
- cl_int4 ival;
- cl_float4 fval;
- cl_double4 dval;
- }val;
- switch(dst.depth())
- {
- case CV_8U:
- val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
- val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
- val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
- val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=uchar -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
- if(((dst.offset & 3) ==0) && ((dst.cols & 3) == 0))
- {
- kernelName = "copymakeborder_C1_D0";
- globalThreads[0] = (dst.cols/4 + localThreads[0]-1) / localThreads[0] * localThreads[0];
- }
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=uchar4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_8S:
- val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
- val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
- val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
- val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=char -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=char4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_16U:
- val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
- val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
- val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
- val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=ushort -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=ushort4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_16S:
- val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
- val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
- val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
- val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=short -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=short4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_32S:
- val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
- val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
- val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
- val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=int -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
- break;
- case 2:
- sprintf(compile_option, "-D GENTYPE=int2 -D %s",borderstr[bordertype_index]);
- cl_int2 i2val;
- i2val.s[0] = val.ival.s[0];
- i2val.s[1] = val.ival.s[1];
- args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=int4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_32F:
- val.fval.s[0] = scalar.val[0];
- val.fval.s[1] = scalar.val[1];
- val.fval.s[2] = scalar.val[2];
- val.fval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=float -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=float4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- case CV_64F:
- val.dval.s[0] = scalar.val[0];
- val.dval.s[1] = scalar.val[1];
- val.dval.s[2] = scalar.val[2];
- val.dval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=double -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=double4 -D %s",borderstr[bordertype_index]);
- args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
- }
-
- openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, localThreads, args, -1, -1,compile_option);
- //uchar* cputemp=new uchar[32*dst.wholerows];
- ////int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
- //openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, (cl_mem)dst.data, CL_TRUE,
- // 0, 32*dst.wholerows, cputemp, 0, NULL, NULL));
- //for(int i=0;i<dst.wholerows;i++)
- //{
- // for(int j=0;j<dst.wholecols;j++)
- // {
- // cout<< (int)cputemp[i*32+j]<<" ";
- // }
- // cout<<endl;
- //}
- //delete []cputemp;
+ char compile_option[64];
+ union sc
+ {
+ cl_uchar4 uval;
+ cl_char4 cval;
+ cl_ushort4 usval;
+ cl_short4 shval;
+ cl_int4 ival;
+ cl_float4 fval;
+ cl_double4 dval;
+ } val;
+ switch(dst.depth())
+ {
+ case CV_8U:
+ val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
+ val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
+ val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
+ val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=uchar -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
+ if(((dst.offset & 3) == 0) && ((dst.cols & 3) == 0))
+ {
+ kernelName = "copymakeborder_C1_D0";
+ globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
+ }
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=uchar4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_8S:
+ val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
+ val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
+ val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
+ val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=char -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=char4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_16U:
+ val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
+ val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
+ val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
+ val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=ushort -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=ushort4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_16S:
+ val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
+ val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
+ val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
+ val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=short -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=short4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_32S:
+ val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
+ val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
+ val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
+ val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=int -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
+ break;
+ case 2:
+ sprintf(compile_option, "-D GENTYPE=int2 -D %s", borderstr[bordertype_index]);
+ cl_int2 i2val;
+ i2val.s[0] = val.ival.s[0];
+ i2val.s[1] = val.ival.s[1];
+ args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=int4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_32F:
+ val.fval.s[0] = scalar.val[0];
+ val.fval.s[1] = scalar.val[1];
+ val.fval.s[2] = scalar.val[2];
+ val.fval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=float -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=float4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ case CV_64F:
+ val.dval.s[0] = scalar.val[0];
+ val.dval.s[1] = scalar.val[1];
+ val.dval.s[2] = scalar.val[2];
+ val.dval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=double -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=double4 -D %s", borderstr[bordertype_index]);
+ args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
+ }
+
+ openCLExecuteKernel(src.clCxt, &imgproc_copymakeboder, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
+ //uchar* cputemp=new uchar[32*dst.wholerows];
+ ////int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
+ //openCLSafeCall(clEnqueueReadBuffer(src.clCxt->impl->clCmdQueue, (cl_mem)dst.data, CL_TRUE,
+ // 0, 32*dst.wholerows, cputemp, 0, NULL, NULL));
+ //for(int i=0;i<dst.wholerows;i++)
+ //{
+ // for(int j=0;j<dst.wholecols;j++)
+ // {
+ // cout<< (int)cputemp[i*32+j]<<" ";
+ // }
+ // cout<<endl;
+ //}
+ //delete []cputemp;
}
////////////////////////////////////////////////////////////////////////
void warpAffine_gpu(const oclMat &src, oclMat &dst, F coeffs[2][3], int interpolation)
{
- CV_Assert( (src.channels() == dst.channels()) );
+ CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1();
int dstStep = dst.step1();
- float float_coeffs[2][3];
- cl_mem coeffs_cm;
+ float float_coeffs[2][3];
+ cl_mem coeffs_cm;
Context *clCxt = src.clCxt;
string s[3] = {"NN", "Linear", "Cubic"};
string kernelName = "warpAffine" + s[interpolation];
- if(src.clCxt -> impl -> double_support != 0)
- {
- cl_int st;
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
- openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
- }else{
- cl_int st;
- for(int m=0;m<2;m++)
- for(int n=0;n<3;n++)
- {
- float_coeffs[m][n]=coeffs[m][n];
- }
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
-
- }
+ if(src.clCxt -> impl -> double_support != 0)
+ {
+ cl_int st;
+ coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
+ openCLVerifyCall(st);
+ openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
+ }
+ else
+ {
+ cl_int st;
+ for(int m = 0; m < 2; m++)
+ for(int n = 0; n < 3; n++)
+ {
+ float_coeffs[m][n] = coeffs[m][n];
+ }
+ coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
+ openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
+
+ }
//TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX;
args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
- openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_warpAffine, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
openCLSafeCall(clReleaseMemObject(coeffs_cm));
}
void warpPerspective_gpu(const oclMat &src, oclMat &dst, double coeffs[3][3], int interpolation)
{
- CV_Assert( (src.channels() == dst.channels()) );
+ CV_Assert( (src.oclchannels() == dst.oclchannels()) );
int srcStep = src.step1();
int dstStep = dst.step1();
- float float_coeffs[3][3];
- cl_mem coeffs_cm;
+ float float_coeffs[3][3];
+ cl_mem coeffs_cm;
Context *clCxt = src.clCxt;
string s[3] = {"NN", "Linear", "Cubic"};
string kernelName = "warpPerspective" + s[interpolation];
- if(src.clCxt -> impl -> double_support != 0)
- {
- cl_int st;
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
- openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
- }else{
- cl_int st;
- for(int m=0;m<3;m++)
- for(int n=0;n<3;n++)
- float_coeffs[m][n]=coeffs[m][n];
-
- coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
- openCLVerifyCall(st);
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
- }
+ if(src.clCxt -> impl -> double_support != 0)
+ {
+ cl_int st;
+ coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
+ openCLVerifyCall(st);
+ openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
+ }
+ else
+ {
+ cl_int st;
+ for(int m = 0; m < 3; m++)
+ for(int n = 0; n < 3; n++)
+ float_coeffs[m][n] = coeffs[m][n];
+
+ coeffs_cm = clCreateBuffer( clCxt->impl->clContext, CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
+ openCLVerifyCall(st);
+ openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
+ }
//TODO: improve this kernel
size_t blkSizeX = 16, blkSizeY = 16;
size_t glbSizeX;
args.push_back(make_pair(sizeof(cl_mem), (void *)&coeffs_cm));
args.push_back(make_pair(sizeof(cl_int), (void *)&cols));
- openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &imgproc_warpPerspective, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
openCLSafeCall(clReleaseMemObject(coeffs_cm));
}
}
{
int interpolation = flags & INTER_MAX;
- CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3);
+ CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
{
int interpolation = flags & INTER_MAX;
- CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.channels() != 2 && src.channels() != 3);
+ CV_Assert((src.depth() == CV_8U || src.depth() == CV_32F) && src.oclchannels() != 2 && src.oclchannels() != 3);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
dst.create(dsize, src.type());
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
{
CV_Assert(src.type() == CV_8UC1);
- if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
int vlen = 4;
int offset = src.offset / vlen;
if (ksize < 0)
scale *= 2.;
- if (src.depth() == CV_8U){
+ if (src.depth() == CV_8U)
+ {
scale *= 255.;
scale = 1. / scale;
- }else{
+ }
+ else
+ {
scale = 1. / scale;
}
if (ksize > 0)
void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize,
double k, int borderType)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
- CV_Assert(src.cols >= blockSize/2 && src.rows >= blockSize/2);
+ CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int borderType)
{
- if(src.clCxt->impl->double_support == 0 && src.depth() ==CV_64F)
+ if(src.clCxt->impl->double_support == 0 && src.depth() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"select device don't support double");
+ CV_Error(CV_GpuNotSupported, "select device don't support double");
}
- CV_Assert(src.cols >= blockSize/2 && src.rows >= blockSize/2);
+ CV_Assert(src.cols >= blockSize / 2 && src.rows >= blockSize / 2);
oclMat Dx, Dy;
CV_Assert(borderType == cv::BORDER_CONSTANT || borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
if( src.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
- if( src.depth() != CV_8U || src.channels() != 4 )
+ if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
if(src.clCxt->impl->double_support == 0)
if( src.empty() )
CV_Error( CV_StsBadArg, "The input image is empty" );
- if( src.depth() != CV_8U || src.channels() != 4 )
+ if( src.depth() != CV_8U || src.oclchannels() != 4 )
CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
if(src.clCxt->impl->double_support == 0)
int dataWidth_bits = 4;
int mask = dataWidth - 1;
- int cols = mat_src.cols * mat_src.channels();
+ int cols = mat_src.cols * mat_src.oclchannels();
int src_offset = mat_src.offset;
int hist_step = mat_sub_hist.step >> 2;
int left_col = 0, right_col = 0;
- if(cols >= dataWidth*2 -1)
+ if(cols >= dataWidth * 2 - 1)
{
- left_col = dataWidth - (src_offset & mask);
- left_col &= mask;
- src_offset += left_col;
- cols -= left_col;
- right_col = cols & mask;
- cols -= right_col;
+ left_col = dataWidth - (src_offset & mask);
+ left_col &= mask;
+ src_offset += left_col;
+ cols -= left_col;
+ right_col = cols & mask;
+ cols -= right_col;
}
else
{
- left_col = cols;
- right_col = 0;
- cols = 0;
- globalThreads[0] = 0;
+ left_col = cols;
+ right_col = 0;
+ cols = 0;
+ globalThreads[0] = 0;
}
vector<pair<size_t , const void *> > args;
if(globalThreads[0] != 0)
{
- int tempcols = cols >> dataWidth_bits;
- int inc_x = globalThreads[0] % tempcols;
- int inc_y = globalThreads[0] / tempcols;
- src_offset >>= dataWidth_bits;
- int src_step = mat_src.step >> dataWidth_bits;
- int datacount = tempcols * mat_src.rows;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&datacount));
- args.push_back( make_pair( sizeof(cl_int), (void *)&tempcols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&inc_x));
- args.push_back( make_pair( sizeof(cl_int), (void *)&inc_y));
- args.push_back( make_pair( sizeof(cl_int), (void *)&hist_step));
- openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth);
+ int tempcols = cols >> dataWidth_bits;
+ int inc_x = globalThreads[0] % tempcols;
+ int inc_y = globalThreads[0] / tempcols;
+ src_offset >>= dataWidth_bits;
+ int src_step = mat_src.step >> dataWidth_bits;
+ int datacount = tempcols * mat_src.rows;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src_step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_sub_hist.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&datacount));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&tempcols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&inc_x));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&inc_y));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&hist_step));
+ openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, depth);
}
if(left_col != 0 || right_col != 0)
{
localThreads[1] = 256;
globalThreads[0] = left_col + right_col;
globalThreads[1] = (mat_src.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
-
+
args.clear();
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src.step));
LUT(mat_src, lut, mat_dst);
}
//////////////////////////////////bilateralFilter////////////////////////////////////////////////////
-static void
-oclbilateralFilter_8u( const oclMat& src, oclMat& dst, int d,
- double sigma_color, double sigma_space,
- int borderType )
-{
- int cn = src.channels();
- int i, j, k, maxk, radius;
- Size size = src.size();
-
- CV_Assert( (src.type() == CV_8UC1 || src.download_channels == 3) &&
- src.type() == dst.type() && src.size() == dst.size() &&
- src.data != dst.data );
-
- if( sigma_color <= 0 )
- sigma_color = 1;
- if( sigma_space <= 0 )
- sigma_space = 1;
-
- double gauss_color_coeff = -0.5/(sigma_color*sigma_color);
- double gauss_space_coeff = -0.5/(sigma_space*sigma_space);
-
- if( d <= 0 )
- radius = cvRound(sigma_space*1.5);
- else
- radius = d/2;
- radius = MAX(radius, 1);
- d = radius*2 + 1;
-
- oclMat temp;
- copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
-
- vector<float> _color_weight(cn*256);
- vector<float> _space_weight(d*d);
- vector<int> _space_ofs(d*d);
- float* color_weight = &_color_weight[0];
- float* space_weight = &_space_weight[0];
- int* space_ofs = &_space_ofs[0];
-
- // initialize color-related bilateral filter coefficients
- for( i = 0; i < 256*cn; i++ )
- color_weight[i] = (float)std::exp(i*i*gauss_color_coeff);
-
- // initialize space-related bilateral filter coefficients
- for( i = -radius, maxk = 0; i <= radius; i++ )
- for( j = -radius; j <= radius; j++ )
+ static void
+ oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d,
+ double sigma_color, double sigma_space,
+ int borderType )
{
- double r = std::sqrt((double)i*i + (double)j*j);
- if( r > radius )
- continue;
- space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff);
- space_ofs[maxk++] = (int)(i*temp.step + j*cn);
+ int cn = src.channels();
+ int i, j, k, maxk, radius;
+ Size size = src.size();
+
+ CV_Assert( (src.channels() == 1 || src.channels() == 3) &&
+ src.type() == dst.type() && src.size() == dst.size() &&
+ src.data != dst.data );
+
+ if( sigma_color <= 0 )
+ sigma_color = 1;
+ if( sigma_space <= 0 )
+ sigma_space = 1;
+
+ double gauss_color_coeff = -0.5 / (sigma_color * sigma_color);
+ double gauss_space_coeff = -0.5 / (sigma_space * sigma_space);
+
+ if( d <= 0 )
+ radius = cvRound(sigma_space * 1.5);
+ else
+ radius = d / 2;
+ radius = MAX(radius, 1);
+ d = radius * 2 + 1;
+
+ oclMat temp;
+ copyMakeBorder( src, temp, radius, radius, radius, radius, borderType );
+
+ vector<float> _color_weight(cn * 256);
+ vector<float> _space_weight(d * d);
+ vector<int> _space_ofs(d * d);
+ float *color_weight = &_color_weight[0];
+ float *space_weight = &_space_weight[0];
+ int *space_ofs = &_space_ofs[0];
+ int dst_step_in_pixel = dst.step / dst.elemSize();
+ int dst_offset_in_pixel = dst.offset / dst.elemSize();
+ int temp_step_in_pixel = temp.step / temp.elemSize();
+ // initialize color-related bilateral filter coefficients
+ for( i = 0; i < 256 * cn; i++ )
+ color_weight[i] = (float)std::exp(i * i * gauss_color_coeff);
+
+ // initialize space-related bilateral filter coefficients
+ for( i = -radius, maxk = 0; i <= radius; i++ )
+ for( j = -radius; j <= radius; j++ )
+ {
+ double r = std::sqrt((double)i * i + (double)j * j);
+ if( r > radius )
+ continue;
+ space_weight[maxk] = (float)std::exp(r * r * gauss_space_coeff);
+ space_ofs[maxk++] = (int)(i * temp_step_in_pixel + j);
+ }
+ oclMat oclcolor_weight(1, cn * 256, CV_32FC1, color_weight);
+ oclMat oclspace_weight(1, d * d, CV_32FC1, space_weight);
+ oclMat oclspace_ofs(1, d * d, CV_32SC1, space_ofs);
+
+ string kernelName = "bilateral";
+ size_t localThreads[3] = { 16, 16, 1 };
+ size_t globalThreads[3] = { (dst.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
+ (dst.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1],
+ 1
+ };
+ if((dst.type() == CV_8UC1) && ((dst.offset & 3) == 0) && ((dst.cols & 3) == 0))
+ {
+ kernelName = "bilateral2";
+ globalThreads[0] = (dst.cols / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
+ }
+ vector<pair<size_t , const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&temp.data ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&maxk ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&radius ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&temp_step_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&temp.rows ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&temp.cols ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data ));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data ));
+ openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, dst.oclchannels(), dst.depth());
}
- oclMat oclcolor_weight(1,cn*256,CV_32FC1,color_weight);
- oclMat oclspace_weight(1,d*d,CV_32FC1,space_weight);
- oclMat oclspace_ofs(1,d*d,CV_32SC1,space_ofs);
-
- string kernelName = "bilateral";
- size_t localThreads[3] = { 16, 16, 1 };
- size_t globalThreads[3] = { (dst.cols+ localThreads[0]-1)/localThreads[0] * localThreads[0],
- (dst.rows+ localThreads[1]-1)/localThreads[1]* localThreads[1],
- 1};
- vector<pair<size_t ,const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&temp.data ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&maxk ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&radius ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&temp.step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&temp.rows ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&temp.cols ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&oclcolor_weight.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_weight.data ));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&oclspace_ofs.data ));
- openCLExecuteKernel(src.clCxt, &imgproc_bilateral, kernelName, globalThreads, localThreads, args, -1, -1);
-}
void bilateralFilter(const oclMat &src, oclMat &dst, int radius, double sigmaclr, double sigmaspc, int borderType)
{
- dst.create( src.size(), src.type() );
- if( src.depth() == CV_8U )
- oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType );
- else
- CV_Error( CV_StsUnsupportedFormat,
- "Bilateral filtering is only implemented for 8uimages" );
+ dst.create( src.size(), src.type() );
+ if( src.depth() == CV_8U )
+ oclbilateralFilter_8u( src, dst, radius, sigmaclr, sigmaspc, borderType );
+ else
+ CV_Error( CV_StsUnsupportedFormat,
+ "Bilateral filtering is only implemented for 8uimages" );
}
}
{
return (total + grain - 1) / grain;
}
-void convolve_run(const oclMat &src, const oclMat &temp1,oclMat &dst,string kernelName,const char** kernelString)
+void convolve_run(const oclMat &src, const oclMat &temp1, oclMat &dst, string kernelName, const char **kernelString)
{
CV_Assert(src.depth() == CV_32FC1);
CV_Assert(temp1.depth() == CV_32F);
- CV_Assert(temp1.cols <= 17 && temp1.rows <=17);
+ CV_Assert(temp1.cols <= 17 && temp1.rows <= 17);
- dst.create(src.size(),src.type());
+ dst.create(src.size(), src.type());
CV_Assert(src.cols == dst.cols && src.rows == dst.rows);
CV_Assert(src.type() == dst.type());
Context *clCxt = src.clCxt;
- int channels = dst.channels();
+ int channels = dst.oclchannels();
int depth = dst.depth();
- size_t vector_length =1;
- int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length-1);
+ size_t vector_length = 1;
+ int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
int cols = divUp(dst.cols * channels + offset_cols, vector_length);
int rows = dst.rows;
size_t localThreads[3] = { 16, 16, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(rows, localThreads[1]) * localThreads[1],
- 1};
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(rows, localThreads[1]) *localThreads[1],
+ 1
+ };
- vector<pair<size_t ,const void *> > args;
+ vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&temp1.data ));
args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
}
-void cv::ocl::convolve(const oclMat& x, const oclMat& t, oclMat& y)
+void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y)
{
CV_Assert(x.depth() == CV_32F);
CV_Assert(t.depth() == CV_32F);
CV_Assert(x.type() == y.type() && x.size() == y.size());
- y.create(x.size(),x.type());
+ y.create(x.size(), x.type());
string kernelName = "convolve";
-
+
convolve_run(x, t, y, kernelName, &imgproc_convolve);
}
#endif /* !defined (HAVE_OPENCL) */
}
void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/,
- size_t /*widthInBytes*/, size_t /*height*/)
+ size_t /*widthInBytes*/, size_t /*height*/)
{
throw_nogpu();
}
void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
- const void * /*src*/, size_t /*spitch*/,
- size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
+ const void * /*src*/, size_t /*spitch*/,
+ size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
{
throw_nogpu();
}
void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/,
- const void * /*src*/, size_t /*spitch*/,
- size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
+ const void * /*src*/, size_t /*spitch*/,
+ size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/)
{
throw_nogpu();
}
- cl_mem openCLCreateBuffer(Context *,size_t, size_t)
+ cl_mem openCLCreateBuffer(Context *, size_t, size_t)
{
throw_nogpu();
}
- void openCLReadBuffer(Context *, cl_mem, void*, size_t)
+ void openCLReadBuffer(Context *, cl_mem, void *, size_t)
{
throw_nogpu();
}
}
cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/,
- const char ** /*fileName*/, string /*kernelName*/)
+ const char ** /*fileName*/, string /*kernelName*/)
{
throw_nogpu();
}
void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/,
- size_t * /*globalThreads*/, size_t * /*localThreads*/)
+ size_t * /*globalThreads*/, size_t * /*localThreads*/)
{
throw_nogpu();
}
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
- const size_t size)
+ const size_t size)
{
throw_nogpu();
}
int double_support;
Impl()
{
- memset(extra_options,0,512);
+ memset(extra_options, 0, 512);
}
};
cl_device_type _devicetype;
switch(devicetype)
{
- case CVCL_DEVICE_TYPE_DEFAULT:
- _devicetype = CL_DEVICE_TYPE_DEFAULT;
- break;
- case CVCL_DEVICE_TYPE_CPU:
- _devicetype = CL_DEVICE_TYPE_CPU;
- break;
- case CVCL_DEVICE_TYPE_GPU:
- _devicetype = CL_DEVICE_TYPE_GPU;
- break;
- case CVCL_DEVICE_TYPE_ACCELERATOR:
- _devicetype = CL_DEVICE_TYPE_ACCELERATOR;
- break;
- case CVCL_DEVICE_TYPE_ALL:
- _devicetype = CL_DEVICE_TYPE_ALL;
- break;
- default:
- CV_Error(CV_GpuApiCallError,"Unkown device type");
+ case CVCL_DEVICE_TYPE_DEFAULT:
+ _devicetype = CL_DEVICE_TYPE_DEFAULT;
+ break;
+ case CVCL_DEVICE_TYPE_CPU:
+ _devicetype = CL_DEVICE_TYPE_CPU;
+ break;
+ case CVCL_DEVICE_TYPE_GPU:
+ _devicetype = CL_DEVICE_TYPE_GPU;
+ break;
+ case CVCL_DEVICE_TYPE_ACCELERATOR:
+ _devicetype = CL_DEVICE_TYPE_ACCELERATOR;
+ break;
+ case CVCL_DEVICE_TYPE_ALL:
+ _devicetype = CL_DEVICE_TYPE_ALL;
+ break;
+ default:
+ CV_Error(CV_GpuApiCallError, "Unkown device type");
}
int devcienums = 0;
// Platform info
ocltmpinfo.impl->devices.push_back(devices[j]);
openCLSafeCall(clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 256, deviceName, NULL));
ocltmpinfo.impl->devName.push_back(std::string(deviceName));
+ ocltmpinfo.DeviceName.push_back(std::string(deviceName));
}
delete[] devices;
oclinfo.push_back(ocltmpinfo);
openCLVerifyCall(status);
//create the command queue using the first device of the list
oclinfo.impl->clCmdQueue = clCreateCommandQueue(oclinfo.impl->oclcontext, oclinfo.impl->devices[devnum],
- CL_QUEUE_PROFILING_ENABLE, &status);
+ CL_QUEUE_PROFILING_ENABLE, &status);
openCLVerifyCall(status);
//get device information
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_GROUP_SIZE,
- sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL));
+ sizeof(size_t), (void *)&oclinfo.impl->maxWorkGroupSize, NULL));
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
- sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL));
+ sizeof(cl_uint), (void *)&oclinfo.impl->maxDimensions, NULL));
oclinfo.impl->maxWorkItemSizes = new size_t[oclinfo.impl->maxDimensions];
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_WORK_ITEM_SIZES,
- sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL));
+ sizeof(size_t)*oclinfo.impl->maxDimensions, (void *)oclinfo.impl->maxWorkItemSizes, NULL));
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_MAX_COMPUTE_UNITS,
- sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL));
+ sizeof(cl_uint), (void *)&oclinfo.impl->maxComputeUnits, NULL));
//initialize extra options for compilation. Currently only fp64 is included.
//Assume 4KB is enough to store all possible extensions.
char extends_set[EXT_LEN];
size_t extends_size;
openCLSafeCall(clGetDeviceInfo(oclinfo.impl->devices[devnum], CL_DEVICE_EXTENSIONS,
- EXT_LEN, (void *)extends_set, &extends_size));
+ EXT_LEN, (void *)extends_set, &extends_size));
CV_Assert(extends_size < EXT_LEN);
- extends_set[EXT_LEN-1] = 0;
+ extends_set[EXT_LEN - 1] = 0;
//oclinfo.extra_options = NULL;
int fp64_khr = string(extends_set).find("cl_khr_fp64");
}
Context::setContext(oclinfo);
}
- void* getoclContext()\r
- {\r
- return &(Context::getContext()->impl->clContext);\r
- }\r
- void* getoclCommandQueue()
- {
- return &(Context::getContext()->impl->clCmdQueue);
- }
+ void *getoclContext()
+
+ {
+
+ return &(Context::getContext()->impl->clContext);
+
+ }
+
+ void *getoclCommandQueue()
+ {
+ return &(Context::getContext()->impl->clCmdQueue);
+ }
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
{
cl_int status;
status = clEnqueueReadBuffer(clCxt->impl->clCmdQueue, dst_buffer, CL_TRUE, 0,
- size, host_buffer, 0, NULL, NULL);
+ size, host_buffer, 0, NULL, NULL);
openCLVerifyCall(status);
}
cl_mem openCLCreateBuffer(Context *clCxt, size_t flag , size_t size)
{
cl_int status;
- cl_mem buffer = clCreateBuffer(clCxt->impl->clContext,(cl_mem_flags)flag, size, NULL, &status);
+ cl_mem buffer = clCreateBuffer(clCxt->impl->clContext, (cl_mem_flags)flag, size, NULL, &status);
openCLVerifyCall(status);
return buffer;
}
void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
- size_t widthInBytes, size_t height)
+ size_t widthInBytes, size_t height)
{
cl_int status;
*dev_ptr = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
- widthInBytes * height, 0, &status);
+ widthInBytes * height, 0, &status);
openCLVerifyCall(status);
*pitch = widthInBytes;
}
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
- const void *src, size_t spitch,
- size_t width, size_t height, enum openCLMemcpyKind kind, int channels)
+ const void *src, size_t spitch,
+ size_t width, size_t height, enum openCLMemcpyKind kind, int channels)
{
size_t buffer_origin[3] = {0, 0, 0};
size_t host_origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
if(kind == clMemcpyHostToDevice)
{
- if(dpitch == width || channels==3 || height == 1)
- {
- openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
- 0, width*height, src, 0, NULL, NULL));
- }
- else
- {
- openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
- buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
- }
+ if(dpitch == width || channels == 3 || height == 1)
+ {
+ openCLSafeCall(clEnqueueWriteBuffer(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
+ 0, width * height, src, 0, NULL, NULL));
+ }
+ else
+ {
+ openCLSafeCall(clEnqueueWriteBufferRect(clCxt->impl->clCmdQueue, (cl_mem)dst, CL_TRUE,
+ buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
+ }
}
else if(kind == clMemcpyDeviceToHost)
{
- if(spitch == width || channels==3 || height == 1)
- {
- openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
- 0, width*height, dst, 0, NULL, NULL));
- }
- else
- {
- openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
- buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
- }
+ if(spitch == width || channels == 3 || height == 1)
+ {
+ openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
+ 0, width * height, dst, 0, NULL, NULL));
+ }
+ else
+ {
+ openCLSafeCall(clEnqueueReadBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, CL_TRUE,
+ buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
+ }
}
}
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
- const void *src, size_t spitch,
- size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind)
+ const void *src, size_t spitch,
+ size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind)
{
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
size_t region[3] = {width, height, 1};
openCLSafeCall(clEnqueueCopyBufferRect(clCxt->impl->clCmdQueue, (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
- region, spitch, 0, dpitch, 0, 0, 0, 0));
+ region, spitch, 0, dpitch, 0, 0, 0, 0));
}
void openCLFree(void *devPtr)
return openCLGetKernelFromSource(clCxt, source, kernelName, NULL);
}
-
+
void setBinpath(const char *path)
{
- Context *clcxt = Context::getContext();
- clcxt->impl->Binpath = path;
+ Context *clcxt = Context::getContext();
+ clcxt->impl->Binpath = path;
}
int savetofile(const Context *clcxt, cl_program &program, const char *fileName)
{
size_t *binarySizes = (size_t *)malloc( sizeof(size_t) * numDevices );
openCLSafeCall(clGetProgramInfo(program,
- CL_PROGRAM_BINARY_SIZES,
- sizeof(size_t) * numDevices,
- binarySizes, NULL));
+ CL_PROGRAM_BINARY_SIZES,
+ sizeof(size_t) * numDevices,
+ binarySizes, NULL));
size_t i = 0;
//copy over all of the generated binaries.
char **binaries = (char **)malloc( sizeof(char *) * numDevices );
if(binaries == NULL)
{
- CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries)\r\n");
+ CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries)\r\n");
}
for(i = 0; i < numDevices; i++)
binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
if(binaries[i] == NULL)
{
- CV_Error(CV_StsNoMem,"Failed to allocate host memory.(binaries[i])\r\n");
+ CV_Error(CV_StsNoMem, "Failed to allocate host memory.(binaries[i])\r\n");
}
}
else
}
}
openCLSafeCall(clGetProgramInfo(program,
- CL_PROGRAM_BINARIES,
- sizeof(char *) * numDevices,
- binaries,
- NULL));
+ CL_PROGRAM_BINARIES,
+ sizeof(char *) * numDevices,
+ binaries,
+ NULL));
//dump out each binary into its own separate file.
for(i = 0; i < numDevices; i++)
{
char deviceName[1024];
openCLSafeCall(clGetDeviceInfo(devices[i],
- CL_DEVICE_NAME,
- sizeof(deviceName),
- deviceName,
- NULL));
+ CL_DEVICE_NAME,
+ sizeof(deviceName),
+ deviceName,
+ NULL));
printf( "%s binary kernel: %s\n", deviceName, fileName);
FILE *fp = fopen(fileName, "wb+");
else
{
printf("Skipping %s since there is no binary data to write!\n",
- fileName);
+ fileName);
}
}
free(binarySizes);
cl_kernel openCLGetKernelFromSource(const Context *clCxt, const char **source, string kernelName,
- const char *build_options)
+ const char *build_options)
{
cl_kernel kernel;
cl_program program ;
cl_int status = 0;
stringstream src_sign;
string srcsign;
- string filename;
+ string filename;
CV_Assert(programCache != NULL);
if(NULL != build_options)
- {
+ {
src_sign << (int64)(*source) << clCxt->impl->clContext << "_" << build_options;
- }
+ }
else
- {
- src_sign << (int64)(*source) << clCxt->impl->clContext;
- }
+ {
+ src_sign << (int64)(*source) << clCxt->impl->clContext;
+ }
srcsign = src_sign.str();
program = NULL;
//config build programs
char all_build_options[1024];
memset(all_build_options, 0, 1024);
- char zeromem[512]={0};
- if(0!=memcmp(clCxt -> impl->extra_options, zeromem,512))
+ char zeromem[512] = {0};
+ if(0 != memcmp(clCxt -> impl->extra_options, zeromem, 512))
strcat(all_build_options, clCxt -> impl->extra_options);
strcat(all_build_options, " ");
if(build_options != NULL)
strcat(all_build_options, build_options);
- if(all_build_options != NULL)
- {
- filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb";
- }
- else
- {
- filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb";
- }
+ if(all_build_options != NULL)
+ {
+ filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + all_build_options + ".clb";
+ }
+ else
+ {
+ filename = clCxt->impl->Binpath + kernelName + "_" + clCxt->impl->devName + ".clb";
+ }
FILE *fp;
fp = fopen(filename.c_str(), "rb");
if(fp == NULL || clCxt->impl->Binpath.size() == 0) //we should genetate a binary file for the first time.
{
program = clCreateProgramWithSource(
- clCxt->impl->clContext, 1, source, NULL, &status);
+ clCxt->impl->clContext, 1, source, NULL, &status);
openCLVerifyCall(status);
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
- if(status == CL_SUCCESS && clCxt->impl->Binpath.size())
- savetofile(clCxt, program, filename.c_str());
+ if(status == CL_SUCCESS && clCxt->impl->Binpath.size())
+ savetofile(clCxt, program, filename.c_str());
}
else
{
fclose(fp);
cl_int status = 0;
program = clCreateProgramWithBinary(clCxt->impl->clContext,
- 1,
- &(clCxt->impl->devices[0]),
- (const size_t *)&binarySize,
- (const unsigned char **)&binary,
- NULL,
- &status);
+ 1,
+ &(clCxt->impl->devices[0]),
+ (const size_t *)&binarySize,
+ (const unsigned char **)&binary,
+ NULL,
+ &status);
openCLVerifyCall(status);
status = clBuildProgram(program, 1, &(clCxt->impl->devices[0]), all_build_options, NULL, NULL);
}
char *buildLog = NULL;
size_t buildLogSize = 0;
logStatus = clGetProgramBuildInfo(program,
- clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize,
- buildLog, &buildLogSize);
+ clCxt->impl->devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize,
+ buildLog, &buildLogSize);
if(logStatus != CL_SUCCESS)
cout << "Failed to build the program and get the build info." << endl;
buildLog = new char[buildLogSize];
CV_DbgAssert(!!buildLog);
memset(buildLog, 0, buildLogSize);
openCLSafeCall(clGetProgramBuildInfo(program, clCxt->impl->devices[0],
- CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
+ CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
cout << "\n\t\t\tBUILD LOG\n";
cout << buildLog << endl;
delete buildLog;
//Cache the binary for future use if build_options is null
if( (programCache->cacheSize += 1) < programCache->MAX_PROG_CACHE_SIZE)
programCache->addProgram(srcsign, program);
- else
- cout << "Warning: code cache has been full.\n";
+ else
+ cout << "Warning: code cache has been full.\n";
}
kernel = clCreateKernel(program, kernelName.c_str(), &status);
openCLVerifyCall(status);
}
void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *blockSize,
- size_t *globalThreads, size_t *localThreads)
+ size_t *globalThreads, size_t *localThreads)
{
size_t kernelWorkGroupSize;
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, clCxt->impl->devices[0],
- CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
+ CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
CV_DbgAssert( (localThreads[0] <= clCxt->impl->maxWorkItemSizes[0]) &&
- (localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
- (localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) &&
- ((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) &&
- (localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize);
+ (localThreads[1] <= clCxt->impl->maxWorkItemSizes[1]) &&
+ (localThreads[2] <= clCxt->impl->maxWorkItemSizes[2]) &&
+ ((localThreads[0] * localThreads[1] * localThreads[2]) <= kernelWorkGroupSize) &&
+ (localThreads[0] * localThreads[1] * localThreads[2]) <= clCxt->impl->maxWorkGroupSize);
}
#ifdef PRINT_KERNEL_RUN_TIME
static double total_kernel_time = 0;
#endif
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
- int depth, const char *build_options)
+ size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
+ int depth, const char *build_options)
{
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
-
+
if ( localThreads != NULL)
- {
+ {
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
-
+
size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
cv::ocl::openCLVerifyKernel(clCxt, kernel, &blockSize, globalThreads, localThreads);
}
#ifndef PRINT_KERNEL_RUN_TIME
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, NULL));
+ localThreads, 0, NULL, NULL));
#else
cl_event event = NULL;
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, &event));
+ localThreads, 0, NULL, &event));
cl_ulong start_time, end_time, queue_time;
double execute_time = 0;
openCLSafeCall(clWaitForEvents(1, &event));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
- sizeof(cl_ulong), &start_time, 0));
+ sizeof(cl_ulong), &start_time, 0));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
- sizeof(cl_ulong), &end_time, 0));
+ sizeof(cl_ulong), &end_time, 0));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
- sizeof(cl_ulong), &queue_time, 0));
+ sizeof(cl_ulong), &queue_time, 0));
execute_time = (double)(end_time - start_time) / (1000 * 1000);
total_time = (double)(end_time - queue_time) / (1000 * 1000);
}
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth)
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth)
{
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args,
- channels, depth, NULL);
+ channels, depth, NULL);
}
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
{
#ifndef PRINT_KERNEL_RUN_TIME
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
- build_options);
+ build_options);
#else
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
cout << endl;
int i = 0;
for(i = 0; i < RUN_TIMES; i++)
openCLExecuteKernel_(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
- build_options);
+ build_options);
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
}
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
- const size_t size)
+ const size_t size)
{
int status;
cl_mem con_struct;
openCLSafeCall(status);
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
- value, 0, 0, 0));
+ value, 0, 0, 0));
return con_struct;
clcxt->impl->clContext = oclinfo.impl->oclcontext;
clcxt->impl->clCmdQueue = oclinfo.impl->clCmdQueue;
clcxt->impl->devices = &oclinfo.impl->devices[oclinfo.impl->devnum];
- clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
+ clcxt->impl->devName = oclinfo.impl->devName[oclinfo.impl->devnum];
clcxt->impl->maxDimensions = oclinfo.impl->maxDimensions;
clcxt->impl->maxWorkGroupSize = oclinfo.impl->maxWorkGroupSize;
clcxt->impl->maxWorkItemSizes = oclinfo.impl->maxWorkItemSizes;
//}
impl->devices.clear();
impl->devName.clear();
+ DeviceName.clear();
}
Info::~Info()
{
{
impl->devices.push_back(m.impl->devices[i]);
impl->devName.push_back(m.impl->devName[i]);
+ DeviceName.push_back(m.DeviceName[i]);
}
return *this;
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Comuter Vision Library\r
+//\r
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.\r
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// @Authors\r
+// Peng Xiao, pengxiao@multicorewareinc.com\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other oclMaterials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors as is and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular urpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#include <iomanip>\r
+#include "precomp.hpp"\r
+\r
+using namespace std;\r
+using namespace cv;\r
+using namespace cv::ocl;\r
+\r
+\r
+#if !defined (HAVE_OPENCL)\r
+void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,\r
+ const oclMat &fu, const oclMat &fv,\r
+ const oclMat &bu, const oclMat &bv,\r
+ float pos, oclMat &newFrame, oclMat &buf)\r
+{\r
+ throw_nogpu();\r
+}\r
+#else\r
+\r
+namespace cv\r
+{\r
+ namespace ocl\r
+ {\r
+ ///////////////////////////OpenCL kernel strings///////////////////////////\r
+ extern const char *interpolate_frames;\r
+\r
+ namespace interpolate\r
+ {\r
+ //The following are ported from NPP_staging.cu\r
+ // As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer,\r
+ // we may have to do this on kernel\r
+ void memsetKernel(float val, oclMat &img, int height, int offset);\r
+ void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset);\r
+ void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,\r
+ int b_offset, int d_offset); // buffer, dst offset\r
+\r
+ //OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1\r
+ void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,\r
+ oclMat &buffer, int buf_offset, float timeScale, int dst_offset);\r
+ //OpenCL conversion of BlendFrames\r
+ void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer,\r
+ float pos, oclMat &newFrame, cl_mem &, cl_mem &);\r
+\r
+ // bind a buffer to an image\r
+ void bindImgTex(const oclMat &img, cl_mem &tex);\r
+ }\r
+ }\r
+}\r
+\r
+void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,\r
+ const oclMat &fu, const oclMat &fv,\r
+ const oclMat &bu, const oclMat &bv,\r
+ float pos, oclMat &newFrame, oclMat &buf)\r
+{\r
+ CV_Assert(frame0.type() == CV_32FC1);\r
+ CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());\r
+ CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());\r
+ CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());\r
+ CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());\r
+ CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());\r
+\r
+ newFrame.create(frame0.size(), frame0.type());\r
+\r
+ buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);\r
+ buf.setTo(Scalar::all(0));\r
+\r
+ size_t step = frame0.step;\r
+\r
+ CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);\r
+ cl_mem tex_src0 = 0, tex_src1 = 0;\r
+\r
+ // warp flow\r
+ using namespace interpolate;\r
+\r
+ bindImgTex(frame0, tex_src0);\r
+ bindImgTex(frame1, tex_src1);\r
+\r
+ // CUDA Offsets\r
+ enum\r
+ {\r
+ cov0 = 0,\r
+ cov1,\r
+ fwdU,\r
+ fwdV,\r
+ bwdU,\r
+ bwdV\r
+ };\r
+\r
+ vectorWarp(fu, fu, fv, buf, cov0, pos, fwdU);\r
+ vectorWarp(fv, fu, fv, buf, cov0, pos, fwdV);\r
+ vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU);\r
+ vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU);\r
+\r
+ blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1);\r
+\r
+ openCLFree(tex_src0);\r
+ openCLFree(tex_src1);\r
+}\r
+\r
+void interpolate::memsetKernel(float val, oclMat &img, int height, int offset)\r
+{\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "memsetKernel";\r
+ vector< pair<size_t, const void *> > args;\r
+ int step = img.step / sizeof(float);\r
+ offset = step * height * offset;\r
+\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&val));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&img.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&img.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&height));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&offset));\r
+\r
+ size_t globalThreads[3] = {img.cols, height, 1};\r
+ size_t localThreads[3] = {16, 16, 1};\r
+ openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset)\r
+{\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "normalizeKernel";\r
+ vector< pair<size_t, const void *> > args;\r
+ int step = buffer.step / sizeof(float);\r
+ factor_offset = step * height * factor_offset;\r
+ dst_offset = step * height * dst_offset;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buffer.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&height));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&factor_offset));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset));\r
+\r
+ size_t globalThreads[3] = {buffer.cols, height, 1};\r
+ size_t localThreads[3] = {16, 16, 1};\r
+ openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,\r
+ int b_offset, int d_offset)\r
+{\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "forwardWarpKernel";\r
+ vector< pair<size_t, const void *> > args;\r
+ int f_step = u.step / sizeof(float); // flow step\r
+ int b_step = buffer.step / sizeof(float);\r
+\r
+ b_offset = b_step * src.rows * b_offset;\r
+ d_offset = b_step * src.rows * d_offset;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&u.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&v.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&f_step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&b_step));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&b_offset));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&d_offset));\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&time_scale));\r
+\r
+ size_t globalThreads[3] = {src.cols, src.rows, 1};\r
+ size_t localThreads[3] = {16, 16, 1};\r
+ openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,\r
+ oclMat &buffer, int b_offset, float timeScale, int d_offset)\r
+{\r
+ memsetKernel(0, buffer, src.rows, b_offset);\r
+ forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset);\r
+ normalizeKernel(buffer, src.rows, b_offset, d_offset);\r
+}\r
+\r
+void interpolate::blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)\r
+{\r
+ int step = buffer.step / sizeof(float);\r
+\r
+ Context *clCxt = Context::getContext();\r
+ string kernelName = "blendFramesKernel";\r
+ vector< pair<size_t, const void *> > args;\r
+\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src0));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&tex_src1));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buffer.data));\r
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&newFrame.data));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.cols));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&frame0.rows));\r
+ args.push_back( make_pair( sizeof(cl_int), (void *)&step));\r
+ args.push_back( make_pair( sizeof(cl_float), (void *)&pos));\r
+\r
+ size_t globalThreads[3] = {frame0.cols, frame0.rows, 1};\r
+ size_t localThreads[3] = {16, 16, 1};\r
+ openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);\r
+}\r
+\r
+void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)\r
+{\r
+ cl_image_format format;\r
+ int err;\r
+ int depth = img.depth();\r
+ int channels = img.channels();\r
+\r
+ switch(depth)\r
+ {\r
+ case CV_8U:\r
+ format.image_channel_data_type = CL_UNSIGNED_INT8;\r
+ break;\r
+ case CV_32S:\r
+ format.image_channel_data_type = CL_UNSIGNED_INT32;\r
+ break;\r
+ case CV_32F:\r
+ format.image_channel_data_type = CL_FLOAT;\r
+ break;\r
+ default:\r
+ throw std::exception();\r
+ break;\r
+ }\r
+ switch(channels)\r
+ {\r
+ case 1:\r
+ format.image_channel_order = CL_R;\r
+ break;\r
+ case 3:\r
+ format.image_channel_order = CL_RGB;\r
+ break;\r
+ case 4:\r
+ format.image_channel_order = CL_RGBA;\r
+ break;\r
+ default:\r
+ throw std::exception();\r
+ break;\r
+ }\r
+ if(texture)\r
+ {\r
+ openCLFree(texture);\r
+ }\r
+\r
+#if CL_VERSION_1_2\r
+ cl_image_desc desc;\r
+ desc.image_type = CL_MEM_OBJECT_IMAGE2D;\r
+ desc.image_width = img.step / img.elemSize();\r
+ desc.image_height = img.rows;\r
+ desc.image_depth = 0;\r
+ desc.image_array_size = 1;\r
+ desc.image_row_pitch = 0;\r
+ desc.image_slice_pitch = 0;\r
+ desc.buffer = NULL;\r
+ desc.num_mip_levels = 0;\r
+ desc.num_samples = 0;\r
+ texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);\r
+#else\r
+ texture = clCreateImage2D(\r
+ Context::getContext()->impl->clContext,\r
+ CL_MEM_READ_WRITE,\r
+ &format,\r
+ img.step / img.elemSize(),\r
+ img.rows,\r
+ 0,\r
+ NULL,\r
+ &err);\r
+#endif\r
+ size_t origin[] = { 0, 0, 0 };\r
+ size_t region[] = { img.step / img.elemSize(), img.rows, 1 };\r
+ clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);\r
+ openCLSafeCall(err);\r
+}\r
+#endif//(HAVE_OPENCL)\r
+\r
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
+ uchar4 src2_data = vload4(0, src2 + src2_index_fix);
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = abs_diff(src1_data, src2_data);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
uchar4 data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data = convert_uchar4_sat(abs_diff(convert_int4_sat(src1_data), src2_data));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
-
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
+ uchar4 src2_data = vload4(0, src2 + src2_index_fix);
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
- uchar4 src2_data = vload4(0, src2 + src2_index);
- uchar4 mask_data = vload4(0, mask + mask_index);
-
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int src2_index_fix = src2_index < 0 ? 0 : src2_index;
+ int mask_index_fix = mask_index < 0 ? 0 : mask_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
+ uchar4 src2_data = vload4(0, src2 + src2_index_fix);
+ uchar4 mask_data = vload4(0, mask + mask_index_fix);
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(src2_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx;
+ src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw;
+ }
+ if(mask_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
+ mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
+ }
+
uchar4 data = *((__global uchar4 *)(dst + dst_index));
short4 tmp = convert_short4_sat(src1_data) + convert_short4_sat(src2_data);
uchar4 tmp_data = convert_uchar4_sat(tmp);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
-
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+
uchar4 data = *((__global uchar4 *)(dst + dst_index));
int4 tmp = convert_int4_sat(src1_data) + src2_data;
uchar4 tmp_data = convert_uchar4_sat(tmp);
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src1_data = vload4(0, src1 + src1_index);
+ int src1_index_fix = src1_index < 0 ? 0 : src1_index;
+ int mask_index_fix = mask_index < 0 ? 0 : mask_index;
+ uchar4 src1_data = vload4(0, src1 + src1_index_fix);
int4 src2_data = (int4)(src2.x, src2.x, src2.x, src2.x);
- uchar4 mask_data = vload4(0, mask + mask_index);
+ uchar4 mask_data = vload4(0, mask + mask_index_fix);
+ if(src1_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx;
+ src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw;
+ }
+ if(mask_index < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (mask_index == -2) ? mask_data.zwxy:mask_data.yzwx;
+ mask_data.xyzw = (mask_index == -1) ? mask_data.wxyz:tmp.xyzw;
+ }
uchar4 data = *((__global uchar4 *)(dst + dst_index));
int4 tmp = convert_int4_sat(src1_data) + src2_data;
int dst_end_1 = mad24(rows - y - 1, dst_step, dst_offset + dst_step1);
int dst_index_0 = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc);
int dst_index_1 = mad24(rows - y - 1, dst_step, dst_offset + x & (int)0xfffffffc);
-
- uchar4 src_data_0 = vload4(0, src + src_index_0);
- uchar4 src_data_1 = vload4(0, src + src_index_1);
+ int src1_index_fix = src_index_0 < 0 ? 0 : src_index_0;
+ int src2_index_fix = src_index_1 < 0 ? 0 : src_index_1;
+ uchar4 src_data_0 = vload4(0, src + src1_index_fix);
+ uchar4 src_data_1 = vload4(0, src + src2_index_fix);
+ if(src_index_0 < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src_index_0 == -2) ? src_data_0.zwxy:src_data_0.yzwx;
+ src_data_0.xyzw = (src_index_0 == -1) ? src_data_0.wxyz:tmp.xyzw;
+ }
+ if(src_index_1 < 0)
+ {
+ uchar4 tmp;
+ tmp.xyzw = (src_index_1 == -2) ? src_data_1.zwxy:src_data_1.yzwx;
+ src_data_1.xyzw = (src_index_1 == -1) ? src_data_1.wxyz:tmp.xyzw;
+ }
uchar4 dst_data_0 = *((__global uchar4 *)(dst + dst_index_0));
uchar4 dst_data_1 = *((__global uchar4 *)(dst + dst_index_1));
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.\r
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// @Authors\r
+// Peng Xiao, pengxiao@multicorewareinc.com\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other oclMaterials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors as is and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+__kernel\r
+ void buildWarpPlaneMaps\r
+ (\r
+ __global float * map_x,\r
+ __global float * map_y,\r
+ __constant float * KRT,\r
+ int tl_u,\r
+ int tl_v,\r
+ int cols,\r
+ int rows,\r
+ int step_x,\r
+ int step_y,\r
+ float scale\r
+ )\r
+{\r
+ int du = get_global_id(0);\r
+ int dv = get_global_id(1);\r
+ step_x /= sizeof(float);\r
+ step_y /= sizeof(float);\r
+\r
+ __constant float * ck_rinv = KRT;\r
+ __constant float * ct = KRT + 9;\r
+\r
+ if (du < cols && dv < rows)\r
+ {\r
+ float u = tl_u + du;\r
+ float v = tl_v + dv;\r
+ float x, y;\r
+\r
+ float x_ = u / scale - ct[0];\r
+ float y_ = v / scale - ct[1];\r
+\r
+ float z;\r
+ x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * (1 - ct[2]);\r
+ y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * (1 - ct[2]);\r
+ z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * (1 - ct[2]);\r
+\r
+ x /= z;\r
+ y /= z;\r
+\r
+ map_x[dv * step_x + du] = x;\r
+ map_y[dv * step_y + du] = y;\r
+ }\r
+}\r
+\r
+__kernel\r
+ void buildWarpCylindricalMaps\r
+ (\r
+ __global float * map_x,\r
+ __global float * map_y,\r
+ __constant float * ck_rinv,\r
+ int tl_u,\r
+ int tl_v,\r
+ int cols,\r
+ int rows,\r
+ int step_x,\r
+ int step_y,\r
+ float scale\r
+ )\r
+{\r
+ int du = get_global_id(0);\r
+ int dv = get_global_id(1);\r
+ step_x /= sizeof(float);\r
+ step_y /= sizeof(float);\r
+\r
+ if (du < cols && dv < rows)\r
+ {\r
+ float u = tl_u + du;\r
+ float v = tl_v + dv;\r
+ float x, y;\r
+\r
+ u /= scale;\r
+ float x_ = sin(u);\r
+ float y_ = v / scale;\r
+ float z_ = cos(u);\r
+\r
+ float z;\r
+ x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;\r
+ y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;\r
+ z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;\r
+\r
+ if (z > 0) { x /= z; y /= z; }\r
+ else x = y = -1;\r
+\r
+ map_x[dv * step_x + du] = x;\r
+ map_y[dv * step_y + du] = y;\r
+ }\r
+}\r
+\r
+__kernel\r
+ void buildWarpSphericalMaps\r
+ (\r
+ __global float * map_x,\r
+ __global float * map_y,\r
+ __constant float * ck_rinv,\r
+ int tl_u,\r
+ int tl_v,\r
+ int cols,\r
+ int rows,\r
+ int step_x,\r
+ int step_y,\r
+ float scale\r
+ )\r
+{\r
+ int du = get_global_id(0);\r
+ int dv = get_global_id(1);\r
+ step_x /= sizeof(float);\r
+ step_y /= sizeof(float);\r
+\r
+ if (du < cols && dv < rows)\r
+ {\r
+ float u = tl_u + du;\r
+ float v = tl_v + dv;\r
+ float x, y;\r
+\r
+ v /= scale;\r
+ u /= scale;\r
+\r
+ float sinv = sin(v);\r
+ float x_ = sinv * sin(u);\r
+ float y_ = - cos(v);\r
+ float z_ = sinv * cos(u);\r
+\r
+ float z;\r
+ x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;\r
+ y = ck_rinv[3] * x_ + ck_rinv[4] * y_ + ck_rinv[5] * z_;\r
+ z = ck_rinv[6] * x_ + ck_rinv[7] * y_ + ck_rinv[8] * z_;\r
+\r
+ if (z > 0) { x /= z; y /= z; }\r
+ else x = y = -1;\r
+\r
+ map_x[dv * step_x + du] = x;\r
+ map_y[dv * step_y + du] = y;\r
+ }\r
+}\r
+\r
+__kernel\r
+ void buildWarpAffineMaps\r
+ (\r
+ __global float * xmap,\r
+ __global float * ymap,\r
+ __constant float * c_warpMat,\r
+ int cols,\r
+ int rows,\r
+ int step_x,\r
+ int step_y\r
+ )\r
+{\r
+ int x = get_global_id(0);\r
+ int y = get_global_id(1);\r
+ step_x /= sizeof(float);\r
+ step_y /= sizeof(float);\r
+\r
+ if (x < cols && y < rows)\r
+ {\r
+ const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];\r
+ const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];\r
+\r
+ map_x[y * step_x + x] = xcoo;\r
+ map_y[y * step_y + x] = ycoo;\r
+ }\r
+}\r
+\r
+__kernel\r
+ void buildWarpPerspectiveMaps\r
+ (\r
+ __global float * xmap,\r
+ __global float * ymap,\r
+ __constant float * c_warpMat,\r
+ int cols,\r
+ int rows,\r
+ int step_x,\r
+ int step_y\r
+ )\r
+{\r
+ int x = get_global_id(0);\r
+ int y = get_global_id(1);\r
+ step_x /= sizeof(float);\r
+ step_y /= sizeof(float);\r
+\r
+ if (x < cols && y < rows)\r
+ {\r
+ const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);\r
+\r
+ const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);\r
+ const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);\r
+\r
+ map_x[y * step_x + x] = xcoo;\r
+ map_y[y * step_y + x] = ycoo;\r
+ }\r
+}\r
+\r
//ss = convert_uint4(src[cur_addr]);
int cur_col = clamp(startX + col, 0, src_whole_cols);
- ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
+ if(con)
+ ss = convert_uint4(src[(startY+i)*(src_step>>2) + cur_col]);
data[i] = con ? ss : 0;
}
selected_col = ADDR_L(startX+col, 0, src_whole_cols);
selected_col = ADDR_R(startX+col, src_whole_cols, selected_col);
+
data[i] = convert_uint4(src[selected_row * (src_step>>2) + selected_col]);
}
for(int i=0; i < ksY+1; i++)
{
con = startX+col >= 0 && startX+col < src_whole_cols && startY+i >= 0 && startY+i < src_whole_rows;
- // int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
- // ss = src[cur_addr];
-
+ //int cur_addr = clamp((startY+i)*(src_step>>2)+(startX+col),0,end_addr);
+ //ss = src[cur_addr];
+
int cur_col = clamp(startX + col, 0, src_whole_cols);
- ss = src[(startY+i)*(src_step>>2) + cur_col];
+ //ss = src[(startY+i)*(src_step>>2) + cur_col];
+ ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>2) + cur_col]:0;
data[i] = con ? ss : 0.f;
}
//ss = src[cur_addr];
int cur_col = clamp(startX + col, 0, src_whole_cols);
- ss = src[(startY+i)*(src_step>>4) + cur_col];
+ //ss = src[(startY+i)*(src_step>>4) + cur_col];
+ ss = (startY+i)<src_whole_rows&&(startY+i)>=0&&cur_col>=0&&cur_col<src_whole_cols?src[(startY+i)*(src_step>>4) + cur_col]:0;
data[i] = con ? ss : (float4)(0.0,0.0,0.0,0.0);
}
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
-//
-//
-
-
-//#pragma OPENCL EXTENSION cl_amd_printf :enable
-__kernel
-void bilateral4(__global uchar4 *dst,
- __global uchar4 *src,
- int rows,
- int cols,
- int channels,
- int radius,
- int wholerows,
- int wholecols,
- int src_step,
- int dst_step,
- int src_offset,
- int dst_offset,
- __constant float *sigClr,
- __constant float *sigSpc)
-{
- uint lidx = get_local_id(0);
- uint lidy = get_local_id(1);
-
- uint gdx = get_global_id(0);
- uint gdy = get_global_id(1);
-
- uint gidx = gdx >=cols?cols-1:gdx;
- uint gidy = gdy >=rows?rows-1:gdy;
-
- uchar4 p,q,tmp;
-
- float4 pf = 0,pq = 0,pd = 0;
- float wt =0;
-
- int r = radius;
- int ij = 0;
- int ct = 0;
-
- uint index_src = src_offset/4 + gidy*src_step/4 + gidx;
- uint index_dst = dst_offset/4 + gidy*dst_step/4 + gidx;
- p = src[index_src];
-
- uint gx,gy;
- uint src_index,dst_index;
-
- for(int ii = -r;ii<r+1;ii++)
- {
- for(int jj =-r;jj<r+1;jj++)
- {
- ij = ii*ii+jj*jj;
- if(ij > mul24(radius,radius)) continue;
- gx = gidx + jj;
- gy = gidy + ii;
-
- src_index = src_offset/4 + gy * src_step/4 + gx;
- q = src[src_index];
-
-
- ct = abs(p.x-q.x)+abs(p.y-q.y)+abs(p.z-q.z);
- wt =sigClr[ct]*sigSpc[(ii+radius)*(2*radius+1)+jj+radius];
-
- pf.x += q.x*wt;
- pf.y += q.y*wt;
- pf.z += q.z*wt;
-// pf.w += q.w*wt;
-
- pq += wt;
-
- }
- }
-
- pd = pf/pq;
- dst[index_dst] = convert_uchar4_rte(pd);
-}
-
-__kernel void bilateral(__global uchar *dst,
+__kernel void bilateral_C1_D0(__global uchar *dst,
__global const uchar *src,
const int dst_rows,
const int dst_cols,
if((gidy<dst_rows) && (gidx<dst_cols))
{
int src_addr = mad24(gidy+radius,src_step,gidx+radius);
- int dst_addr = mad24(gidy,src_step,gidx+dst_offset);
- float sum = 0, wsum = 0;
+ int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
+ float sum = 0.f, wsum = 0.f;
int val0 = (int)src[src_addr];
for(int k = 0; k < maxk; k++ )
dst[dst_addr] = convert_uchar_rtz(sum/wsum+0.5f);
}
}
+__kernel void bilateral2_C1_D0(__global uchar *dst,
+ __global const uchar *src,
+ const int dst_rows,
+ const int dst_cols,
+ const int maxk,
+ const int radius,
+ const int dst_step,
+ const int dst_offset,
+ const int src_step,
+ const int src_rows,
+ const int src_cols,
+ __constant float *color_weight,
+ __constant float *space_weight,
+ __constant int *space_ofs)
+{
+ int gidx = get_global_id(0)<<2;
+ int gidy = get_global_id(1);
+ if((gidy<dst_rows) && (gidx<dst_cols))
+ {
+ int src_addr = mad24(gidy+radius,src_step,gidx+radius);
+ int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
+ float4 sum = (float4)(0.f), wsum = (float4)(0.f);
+
+ int4 val0 = convert_int4(vload4(0,src+src_addr));
+ for(int k = 0; k < maxk; k++ )
+ {
+ int4 val = convert_int4(vload4(0,src+src_addr + space_ofs[k]));
+ float4 w = (float4)(space_weight[k])*(float4)(color_weight[abs(val.x - val0.x)],color_weight[abs(val.y - val0.y)],color_weight[abs(val.z - val0.z)],color_weight[abs(val.w - val0.w)]);
+ sum += convert_float4(val)*w;
+ wsum += w;
+ }
+ *(__global uchar4*)(dst+dst_addr) = convert_uchar4_rtz(sum/wsum+0.5f);
+ }
+}
+__kernel void bilateral_C4_D0(__global uchar4 *dst,
+ __global const uchar4 *src,
+ const int dst_rows,
+ const int dst_cols,
+ const int maxk,
+ const int radius,
+ const int dst_step,
+ const int dst_offset,
+ const int src_step,
+ const int src_rows,
+ const int src_cols,
+ __constant float *color_weight,
+ __constant float *space_weight,
+ __constant int *space_ofs)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ if((gidy<dst_rows) && (gidx<dst_cols))
+ {
+ int src_addr = mad24(gidy+radius,src_step,gidx+radius);
+ int dst_addr = mad24(gidy,dst_step,gidx+dst_offset);
+ float4 sum = (float4)0.f;
+ float wsum = 0.f;
+ int4 val0 = convert_int4(src[src_addr]);
+ for(int k = 0; k < maxk; k++ )
+ {
+ int4 val = convert_int4(src[src_addr + space_ofs[k]]);
+ float w = space_weight[k]*color_weight[abs(val.x - val0.x)+abs(val.y - val0.y)+abs(val.z - val0.z)];
+ sum += convert_float4(val)*(float4)w;
+ wsum += w;
+ }
+ wsum=1.f/wsum;
+ dst[dst_addr] = convert_uchar4_rtz(sum*(float4)wsum+(float4)0.5f);
+ }
+}
int rowIndex = mad24(gy, gn, gx);
// rowIndex &= (PARTIAL_HISTOGRAM256_COUNT - 1);
- __local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE + 1];
+ __local int subhist[HISTOGRAM256_LOCAL_MEM_SIZE];
subhist[lidy] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
gidx = ((gidx>=left_col) ? (gidx+cols) : gidx);
- int src_index = src_offset + mad24(gidy, src_step, gidx);
- barrier(CLK_LOCAL_MEM_FENCE);
- int p = (int)src[src_index];
- p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
- atomic_inc(subhist + p);
+ if(gidy<rows)
+ {
+ int src_index = src_offset + mad24(gidy, src_step, gidx);
+ int p = (int)src[src_index];
+// p = gidy >= rows ? HISTOGRAM256_LOCAL_MEM_SIZE : p;
+ atomic_inc(subhist + p);
+ }
barrier(CLK_LOCAL_MEM_FENCE);
globalHist[mad24(rowIndex, hist_step, lidy)] += subhist[lidy];
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.\r
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// @Authors\r
+// Peng Xiao, pengxiao@multicorewareinc.com\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other oclMaterials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors as is and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\r
+#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\r
+\r
+// Image read mode\r
+__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;\r
+\r
+// atomic add for 32bit floating point\r
+inline void atomic_addf(volatile __global float *source, const float operand) {\r
+ union {\r
+ unsigned int intVal;\r
+ float floatVal;\r
+ } newVal;\r
+ union {\r
+ unsigned int intVal;\r
+ float floatVal;\r
+ } prevVal;\r
+ do {\r
+ prevVal.floatVal = *source;\r
+ newVal.floatVal = prevVal.floatVal + operand;\r
+ } while (atomic_cmpxchg((volatile __global unsigned int *)source, prevVal.intVal, newVal.intVal) != prevVal.intVal);\r
+}\r
+\r
+__kernel void memsetKernel(\r
+ float val,\r
+ __global float * image,\r
+ int width,\r
+ int height,\r
+ int step, // in element\r
+ int offset\r
+ )\r
+{\r
+ if(get_global_id(0) >= width || get_global_id(1) >= height)\r
+ {\r
+ return;\r
+ }\r
+ image += offset;\r
+ image[get_global_id(0) + get_global_id(1) * step] = val;\r
+}\r
+\r
+__kernel void normalizeKernel(\r
+ __global float * buffer,\r
+ int width,\r
+ int height,\r
+ int step,\r
+ int f_offset,\r
+ int d_offset\r
+ )\r
+{\r
+ __global float * factors = buffer + f_offset;\r
+ __global float * dst = buffer + d_offset;\r
+\r
+ int j = get_global_id(0);\r
+ int i = get_global_id(1);\r
+\r
+ if(j >= width || i >= height)\r
+ {\r
+ return;\r
+ }\r
+ float scale = factors[step * i + j];\r
+ float invScale = (scale == 0.0f) ? 1.0f : (1.0f / scale);\r
+\r
+ dst[step * i + j] *= invScale;\r
+}\r
+\r
+__kernel void forwardWarpKernel(\r
+ __global const float * src,\r
+ __global float * buffer,\r
+ __global const float * u,\r
+ __global const float * v,\r
+ const int w,\r
+ const int h,\r
+ const int flow_stride,\r
+ const int image_stride,\r
+ const int factor_offset,\r
+ const int dst_offset,\r
+ const float time_scale\r
+ )\r
+{\r
+ int j = get_global_id(0);\r
+ int i = get_global_id(1);\r
+\r
+ if (i >= h || j >= w) return;\r
+\r
+ volatile __global float * normalization_factor = (volatile __global float *) buffer + factor_offset;\r
+ volatile __global float * dst = (volatile __global float *)buffer + dst_offset;\r
+\r
+ int flow_row_offset = i * flow_stride;\r
+ int image_row_offset = i * image_stride;\r
+\r
+ //bottom left corner of a target pixel\r
+ float cx = u[flow_row_offset + j] * time_scale + (float)j + 1.0f;\r
+ float cy = v[flow_row_offset + j] * time_scale + (float)i + 1.0f;\r
+ // pixel containing bottom left corner\r
+ float px;\r
+ float py;\r
+ float dx = modf(cx, &px);\r
+ float dy = modf(cy, &py);\r
+ // target pixel integer coords\r
+ int tx;\r
+ int ty;\r
+ tx = (int) px;\r
+ ty = (int) py;\r
+ float value = src[image_row_offset + j];\r
+ float weight;\r
+ // fill pixel containing bottom right corner\r
+ if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))\r
+ {\r
+ weight = dx * dy;\r
+ atomic_addf(dst + ty * image_stride + tx, value * weight);\r
+ atomic_addf(normalization_factor + ty * image_stride + tx, weight);\r
+ }\r
+\r
+ // fill pixel containing bottom left corner\r
+ tx -= 1;\r
+ if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))\r
+ {\r
+ weight = (1.0f - dx) * dy;\r
+ atomic_addf(dst + ty * image_stride + tx, value * weight);\r
+ atomic_addf(normalization_factor + ty * image_stride + tx, weight);\r
+ }\r
+\r
+ // fill pixel containing upper left corner\r
+ ty -= 1;\r
+ if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))\r
+ {\r
+ weight = (1.0f - dx) * (1.0f - dy);\r
+ atomic_addf(dst + ty * image_stride + tx, value * weight);\r
+ atomic_addf(normalization_factor + ty * image_stride + tx, weight);\r
+ }\r
+\r
+ // fill pixel containing upper right corner\r
+ tx += 1;\r
+ if (!((tx >= w) || (tx < 0) || (ty >= h) || (ty < 0)))\r
+ {\r
+ weight = dx * (1.0f - dy);\r
+ atomic_addf(dst + ty * image_stride + tx, value * weight);\r
+ atomic_addf(normalization_factor + ty * image_stride + tx, weight);\r
+ }\r
+}\r
+\r
+// define buffer offsets\r
+enum\r
+{\r
+ O0_OS = 0,\r
+ O1_OS,\r
+ U_OS,\r
+ V_OS,\r
+ UR_OS,\r
+ VR_OS\r
+};\r
+\r
+__kernel void blendFramesKernel(\r
+ image2d_t tex_src0,\r
+ image2d_t tex_src1,\r
+ __global float * buffer,\r
+ __global float * out,\r
+ int w,\r
+ int h,\r
+ int step,\r
+ float theta\r
+ )\r
+{\r
+ __global float * u = buffer + h * step * U_OS;\r
+ __global float * v = buffer + h * step * V_OS;\r
+ __global float * ur = buffer + h * step * UR_OS;\r
+ __global float * vr = buffer + h * step * VR_OS;\r
+ __global float * o0 = buffer + h * step * O0_OS;\r
+ __global float * o1 = buffer + h * step * O1_OS;\r
+\r
+ int ix = get_global_id(0);\r
+ int iy = get_global_id(1);\r
+\r
+ if(ix >= w || iy >= h) return;\r
+\r
+ int pos = ix + step * iy;\r
+\r
+ float _u = u[pos];\r
+ float _v = v[pos];\r
+\r
+ float _ur = ur[pos];\r
+ float _vr = vr[pos];\r
+\r
+ float x = (float)ix + 0.5f;\r
+ float y = (float)iy + 0.5f;\r
+ bool b0 = o0[pos] > 1e-4f;\r
+ bool b1 = o1[pos] > 1e-4f;\r
+\r
+ float2 coord0 = (float2)(x - _u * theta, y - _v * theta);\r
+ float2 coord1 = (float2)(x + _u * (1.0f - theta), y + _v * (1.0f - theta));\r
+\r
+ if (b0 && b1)\r
+ {\r
+ // pixel is visible on both frames\r
+ out[pos] = read_imagef(tex_src0, sampler, coord0).x * (1.0f - theta) + \r
+ read_imagef(tex_src1, sampler, coord1).x * theta;\r
+ }\r
+ else if (b0)\r
+ {\r
+ // visible on the first frame only\r
+ out[pos] = read_imagef(tex_src0, sampler, coord0).x;\r
+ }\r
+ else\r
+ {\r
+ // visible on the second frame only\r
+ out[pos] = read_imagef(tex_src1, sampler, coord1).x;\r
+ }\r
+}\r
using namespace std;
#if !defined (HAVE_OPENCL)
-void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
+void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &)
+{
+ throw_nogpu();
+}
#else
//helper routines
namespace cv
}
}
-namespace cv { namespace ocl
+namespace cv
{
- void matchTemplate_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ namespace ocl
+ {
+ void matchTemplate_SQDIFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplate_SQDIFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ void matchTemplate_SQDIFF_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplate_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ void matchTemplate_CCORR(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplate_CCORR_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ void matchTemplate_CCORR_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplate_CCOFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ void matchTemplate_CCOFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplate_CCOFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
+ void matchTemplate_CCOFF_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
- void matchTemplateNaive_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+ void matchTemplateNaive_SQDIFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, int cn);
- void matchTemplateNaive_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, int cn);
+ void matchTemplateNaive_CCORR(
+ const oclMat &image, const oclMat &templ, oclMat &result, int cn);
- // Evaluates optimal template's area threshold. If
- // template's area is less than the threshold, we use naive match
- // template version, otherwise FFT-based (if available)
- int getTemplateThreshold(int method, int depth)
- {
- switch (method)
+ // Evaluates optimal template's area threshold. If
+ // template's area is less than the threshold, we use naive match
+ // template version, otherwise FFT-based (if available)
+ int getTemplateThreshold(int method, int depth)
{
- case CV_TM_CCORR:
- if (depth == CV_32F) return 250;
- if (depth == CV_8U) return 300;
- break;
- case CV_TM_SQDIFF:
- if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
- if (depth == CV_8U) return 300;
- break;
+ switch (method)
+ {
+ case CV_TM_CCORR:
+ if (depth == CV_32F) return 250;
+ if (depth == CV_8U) return 300;
+ break;
+ case CV_TM_SQDIFF:
+ if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
+ if (depth == CV_8U) return 300;
+ break;
+ }
+ CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
+ return 0;
}
- CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
- return 0;
- }
- //////////////////////////////////////////////////////////////////////
- // SQDIFF
- void matchTemplate_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &)
- {
- result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+ //////////////////////////////////////////////////////////////////////
+ // SQDIFF
+ void matchTemplate_SQDIFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &)
{
- matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
- return;
+ result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+ if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+ {
+ matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
+ return;
+ }
+ else
+ {
+ // TODO
+ CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+ }
}
- else
+
+ void matchTemplate_SQDIFF_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
- // TODO
- CV_Error(CV_StsBadArg, "Not supported yet for this size template");
- }
- }
+ matchTemplate_CCORR(image, templ, result, buf);
+ buf.image_sums.resize(1);
- void matchTemplate_SQDIFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- matchTemplate_CCORR(image,templ,result,buf);
- buf.image_sums.resize(1);
+ integral(image.reshape(1), buf.image_sums[0]);
- integral(image.reshape(1), buf.image_sums[0]);
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-#if SQRSUM_FIXED
- unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-#else
- Mat sqr_mat = templ.reshape(1);
- unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
-#endif
-
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
- }
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
+ vector< pair<size_t, const void *> > args;
- void matchTemplateNaive_SQDIFF(
- const oclMat& image, const oclMat& templ, oclMat& result, int)
- {
- CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
- || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
- );
- CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
- CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_SQDIFF";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- //////////////////////////////////////////////////////////////////////
- // CCORR
- void matchTemplate_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+ }
+
+ void matchTemplateNaive_SQDIFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, int)
{
- matchTemplateNaive_CCORR(image, templ, result, image.channels());
- return;
+ CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+ || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
+ );
+ CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
+ CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Naive_SQDIFF";
+
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
- else
+
+ //////////////////////////////////////////////////////////////////////
+ // CCORR
+ void matchTemplate_CCORR(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
- CV_Error(CV_StsBadArg, "Not supported yet for this size template");
- if(image.depth() == CV_8U && templ.depth() == CV_8U)
+ result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
+ if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
{
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
+ matchTemplateNaive_CCORR(image, templ, result, image.channels());
+ return;
+ }
+ else
+ {
+ CV_Error(CV_StsBadArg, "Not supported yet for this size template");
+ if(image.depth() == CV_8U && templ.depth() == CV_8U)
+ {
+ image.convertTo(buf.imagef, CV_32F);
+ templ.convertTo(buf.templf, CV_32F);
+ }
+ CV_Assert(image.channels() == 1);
+ oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
+ filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0));
+ result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1));
}
- CV_Assert(image.channels() == 1);
- oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
- filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
- result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
}
- }
- void matchTemplate_CCORR_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- matchTemplate_CCORR(image,templ,result,buf);
- buf.image_sums.resize(1);
- buf.image_sqsums.resize(1);
+ void matchTemplate_CCORR_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
+ {
+ matchTemplate_CCORR(image, templ, result, buf);
+ buf.image_sums.resize(1);
+ buf.image_sqsums.resize(1);
- integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-#if SQRSUM_FIXED
- unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
-#else
- oclMat templ_c1 = templ.reshape(1);
- multiply(templ_c1, templ_c1, templ_c1);
- unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
-#endif
- Context *clCxt = image.clCxt;
- string kernelName = "normalizeKernel";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
- }
+ integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
+
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "normalizeKernel";
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
+ }
- void matchTemplateNaive_CCORR(
- const oclMat& image, const oclMat& templ, oclMat& result, int)
- {
- CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
- || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
- );
- CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
- CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
-
- Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_CCORR";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
- //////////////////////////////////////////////////////////////////////
- // CCOFF
- void matchTemplate_CCOFF(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
-
- matchTemplate_CCORR(image,templ,result,buf);
-
- Context *clCxt = image.clCxt;
- string kernelName;
-
- kernelName = "matchTemplate_Prepared_CCOFF";
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
-
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- // to be continued in the following section
- if(image.channels() == 1)
+ void matchTemplateNaive_CCORR(
+ const oclMat &image, const oclMat &templ, oclMat &result, int)
{
- buf.image_sums.resize(1);
- integral(image, buf.image_sums[0]);
-
- float templ_sum = 0;
- templ_sum = (float)sum(templ)[0] / templ.size().area();
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
+ CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
+ || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
+ );
+ CV_Assert(image.channels() == templ.channels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.channels() == 1);
+ CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
+
+ Context *clCxt = image.clCxt;
+ string kernelName = "matchTemplate_Naive_CCORR";
+
+ vector< pair<size_t, const void *> > args;
+
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
- else
+ //////////////////////////////////////////////////////////////////////
+ // CCOFF
+ void matchTemplate_CCOFF(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
- Vec4f templ_sum = Vec4f::all(0);
- split(image,buf.images);
- templ_sum = sum(templ) / templ.size().area();
- buf.image_sums.resize(buf.images.size());
-
-
- for(int i = 0; i < image.channels(); i ++)
+ CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
+
+ matchTemplate_CCORR(image, templ, result, buf);
+
+ Context *clCxt = image.clCxt;
+ string kernelName;
+
+ kernelName = "matchTemplate_Prepared_CCOFF";
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ // to be continued in the following section
+ if(image.channels() == 1)
{
- integral(buf.images[i], buf.image_sums[i]);
- }
- switch(image.channels())
- {
- case 4:
+ buf.image_sums.resize(1);
+ integral(image, buf.image_sums[0]);
+
+ float templ_sum = 0;
+ templ_sum = (float)sum(templ)[0] / templ.size().area();
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
- break;
- default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
- break;
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
}
+ else
+ {
+ Vec4f templ_sum = Vec4f::all(0);
+ split(image, buf.images);
+ templ_sum = sum(templ) / templ.size().area();
+ buf.image_sums.resize(buf.images.size());
+
+
+ for(int i = 0; i < image.channels(); i ++)
+ {
+ integral(buf.images[i], buf.image_sums[i]);
+ }
+ switch(image.oclchannels())
+ {
+ case 4:
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+ break;
+ default:
+ CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ break;
+ }
+ }
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
- void matchTemplate_CCOFF_NORMED(
- const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
- {
- image.convertTo(buf.imagef, CV_32F);
- templ.convertTo(buf.templf, CV_32F);
-
- matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
- float scale = 1.f/templ.size().area();
-
- Context *clCxt = image.clCxt;
- string kernelName;
-
- kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
- size_t globalThreads[3] = {result.cols, result.rows, 1};
- size_t localThreads[3] = {32, 8, 1};
-
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
- // to be continued in the following section
- if(image.channels() == 1)
+ void matchTemplate_CCOFF_NORMED(
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
- buf.image_sums.resize(1);
- buf.image_sqsums.resize(1);
- integral(image, buf.image_sums[0], buf.image_sqsums[0]);
- float templ_sum = 0;
- float templ_sqsum = 0;
- templ_sum = (float)sum(templ)[0];
-#if SQRSUM_FIXED
- templ_sqsum = sqrSum(templ)[0];
-#else
- oclMat templ_sqr = templ;
- multiply(templ,templ, templ_sqr);
- templ_sqsum = saturate_cast<float>(sum(templ_sqr)[0]);
-#endif //SQRSUM_FIXED
- templ_sqsum -= scale * templ_sum * templ_sum;
- templ_sum *= scale;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
- }
- else
- {
- Vec4f templ_sum = Vec4f::all(0);
- Vec4f templ_sqsum = Vec4f::all(0);
-
- split(image,buf.images);
- templ_sum = sum(templ);
-#if SQRSUM_FIXED
- templ_sqsum = sqrSum(templ);
-#else
- oclMat templ_sqr = templ;
- multiply(templ,templ, templ_sqr);
- templ_sqsum = sum(templ_sqr);
-#endif //SQRSUM_FIXED
- templ_sqsum -= scale * templ_sum * templ_sum;
-
- float templ_sqsum_sum = 0;
- for(int i = 0; i < image.channels(); i ++)
+ image.convertTo(buf.imagef, CV_32F);
+ templ.convertTo(buf.templf, CV_32F);
+
+ matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
+ float scale = 1.f / templ.size().area();
+
+ Context *clCxt = image.clCxt;
+ string kernelName;
+
+ kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
+ size_t globalThreads[3] = {result.cols, result.rows, 1};
+ size_t localThreads[3] = {32, 8, 1};
+
+ vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
+ // to be continued in the following section
+ if(image.channels() == 1)
{
- templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
- }
- templ_sum *= scale;
- buf.image_sums.resize(buf.images.size());
- buf.image_sqsums.resize(buf.images.size());
+ buf.image_sums.resize(1);
+ buf.image_sqsums.resize(1);
+ integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+ float templ_sum = 0;
+ float templ_sqsum = 0;
+ templ_sum = (float)sum(templ)[0];
- for(int i = 0; i < image.channels(); i ++)
- {
- integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
- }
+ templ_sqsum = sqrSum(templ)[0];
+
+ templ_sqsum -= scale * templ_sum * templ_sum;
+ templ_sum *= scale;
- switch(image.channels())
- {
- case 4:
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
- args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
- break;
- default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
- break;
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum) );
}
+ else
+ {
+ Vec4f templ_sum = Vec4f::all(0);
+ Vec4f templ_sqsum = Vec4f::all(0);
+
+ split(image, buf.images);
+ templ_sum = sum(templ);
+
+ templ_sqsum = sqrSum(templ);
+
+ templ_sqsum -= scale * templ_sum * templ_sum;
+
+ float templ_sqsum_sum = 0;
+ for(int i = 0; i < image.oclchannels(); i ++)
+ {
+ templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
+ }
+ templ_sum *= scale;
+ buf.image_sums.resize(buf.images.size());
+ buf.image_sqsums.resize(buf.images.size());
+
+ for(int i = 0; i < image.oclchannels(); i ++)
+ {
+ integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+ }
+
+ switch(image.oclchannels())
+ {
+ case 4:
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
+ args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+ args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
+ break;
+ default:
+ CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ break;
+ }
+ }
+ openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
}
- openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
- }
-}/*ocl*/} /*cv*/
+ }/*ocl*/
+} /*cv*/
-void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
+void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
{
MatchTemplateBuf buf;
- matchTemplate(image,templ, result, method,buf);
+ matchTemplate(image, templ, result, method, buf);
}
-void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
+void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
{
CV_Assert(image.type() == templ.type());
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
- typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
+ typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
- const Caller callers[] = {
- ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
- ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
+ const Caller callers[] =
+ {
+ ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
+ ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
};
#include "precomp.hpp"
-#define ALIGN 32
+#define ALIGN 32
#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN
using namespace cv;
{
namespace ocl
{
- void oclMat::upload(const Mat& /*m*/)
+ void oclMat::upload(const Mat & /*m*/)
{
throw_nogpu();
}
- void oclMat::download(cv::Mat& /*m*/) const
+ void oclMat::download(cv::Mat & /*m*/) const
{
throw_nogpu();
}
- void oclMat::copyTo( oclMat& /*m*/ ) const
+ void oclMat::copyTo( oclMat & /*m*/ ) const
{
throw_nogpu();
}
- void oclMat::copyTo( oclMat& /*m*/, const oclMat&/* mask */) const
+ void oclMat::copyTo( oclMat & /*m*/, const oclMat &/* mask */) const
{
throw_nogpu();
}
- void oclMat::convertTo( oclMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const
+ void oclMat::convertTo( oclMat & /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const
{
throw_nogpu();
}
- oclMat &oclMat::operator = (const Scalar& /*s*/)
+ oclMat &oclMat::operator = (const Scalar & /*s*/)
{
throw_nogpu();
return *this;
}
- oclMat &oclMat::setTo(const Scalar& /*s*/, const oclMat& /*mask*/)
+ oclMat &oclMat::setTo(const Scalar & /*s*/, const oclMat & /*mask*/)
{
throw_nogpu();
return *this;
extern const char *operator_convertTo;
extern const char *operator_setTo;
extern const char *operator_setToM;
- extern const char *convertC3C4;
+ extern const char *convertC3C4;
}
}
// convert_C3C4
void convert_C3C4(const cl_mem &src, oclMat &dst, int srcStep)
{
- int dstStep_in_pixel = dst.step1() / dst.channels();
- int pixel_end = dst.wholecols * dst.wholerows -1;
+ int dstStep_in_pixel = dst.step1() / dst.oclchannels();
+ int pixel_end = dst.wholecols * dst.wholerows - 1;
Context *clCxt = dst.clCxt;
string kernelName = "convertC3C4";
- char compile_option[32];
+ char compile_option[32];
switch(dst.depth())
{
case 0:
case 6:
sprintf(compile_option, "-D GENTYPE4=double4");
break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&src));
args.push_back( make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));
- size_t globalThreads[3] = {((dst.wholecols *dst.wholerows+3)/4 + 255) / 256 * 256, 1, 1};
+ size_t globalThreads[3] = {((dst.wholecols * dst.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
- openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option);
+ openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
////////////////////////////////////////////////////////////////////////
// convert_C4C3
void convert_C4C3(const oclMat &src, cl_mem &dst, int dstStep)
{
- int srcStep_in_pixel = src.step1() / src.channels();
- int pixel_end = src.wholecols*src.wholerows -1;
+ int srcStep_in_pixel = src.step1() / src.oclchannels();
+ int pixel_end = src.wholecols * src.wholerows - 1;
Context *clCxt = src.clCxt;
string kernelName = "convertC4C3";
- char compile_option[32];
+ char compile_option[32];
switch(src.depth())
{
case 0:
case 6:
sprintf(compile_option, "-D GENTYPE4=double4");
break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
args.push_back( make_pair( sizeof(cl_int), (void *)&pixel_end));
- size_t globalThreads[3] = {((src.wholecols *src.wholerows+3)/4 + 255) / 256 * 256, 1, 1};
+ size_t globalThreads[3] = {((src.wholecols * src.wholerows + 3) / 4 + 255) / 256 * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
- openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1,compile_option);
+ openCLExecuteKernel(clCxt, &convertC3C4, kernelName, globalThreads, localThreads, args, -1, -1, compile_option);
}
void cv::ocl::oclMat::upload(const Mat &m)
Size wholeSize;
Point ofs;
m.locateROI(wholeSize, ofs);
- int type = m.type();
- if(m.channels() == 3)
- {
- type = CV_MAKETYPE(m.depth(), 4);
- }
- create(wholeSize, type);
+ // int type = m.type();
+ // if(m.oclchannels() == 3)
+ //{
+ // type = CV_MAKETYPE(m.depth(), 4);
+ //}
+ create(wholeSize, m.type());
if(m.channels() == 3)
{
- int pitch = wholeSize.width * 3 * m.elemSize1();
- int tail_padding = m.elemSize1()*3072;
- int err;
- cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE,
- (pitch*wholeSize.height+tail_padding-1)/tail_padding*tail_padding,0,&err);
- openCLVerifyCall(err);
-
- openCLMemcpy2D(clCxt,temp,pitch,m.datastart,m.step,wholeSize.width*m.elemSize(),wholeSize.height,clMemcpyHostToDevice,3);
- convert_C3C4(temp, *this, pitch);
- //int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
- //int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
- //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
- // 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
- //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
- // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
- //for(int i=0;i<wholeSize.height;i++)
- //{
- // int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
- // for(int j=0;j<wholeSize.width;j++)
- // {
- // if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
- // printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
- // i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
- // }
- //}
- //delete []cputemp;
- //delete []cpudata;
- openCLSafeCall(clReleaseMemObject(temp));
+ int pitch = wholeSize.width * 3 * m.elemSize1();
+ int tail_padding = m.elemSize1() * 3072;
+ int err;
+ cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+ (pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
+ openCLVerifyCall(err);
+
+ openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
+ convert_C3C4(temp, *this, pitch);
+ //int* cputemp=new int[wholeSize.height*wholeSize.width * 3];
+ //int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
+ //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
+ // 0, wholeSize.height*wholeSize.width * 3* sizeof(int), cputemp, 0, NULL, NULL));
+ //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
+ // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
+ //for(int i=0;i<wholeSize.height;i++)
+ //{
+ // int *a = cputemp+i*wholeSize.width * 3,*b = cpudata + i*this->step/sizeof(int);
+ // for(int j=0;j<wholeSize.width;j++)
+ // {
+ // if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
+ // printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
+ // i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
+ // }
+ //}
+ //delete []cputemp;
+ //delete []cpudata;
+ openCLSafeCall(clReleaseMemObject(temp));
}
else
- {
- openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
- }
+ {
+ openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
+ }
rows = m.rows;
cols = m.cols;
offset = ofs.y * step + ofs.x * elemSize();
- download_channels = m.channels();
+ //download_channels = m.channels();
}
void cv::ocl::oclMat::download(cv::Mat &m) const
{
CV_DbgAssert(!this->empty());
- int t = type();
- if(download_channels == 3)
- {
- t = CV_MAKETYPE(depth(), 3);
- }
- m.create(wholerows, wholecols, t);
-
- if(download_channels == 3)
+ // int t = type();
+ // if(download_channels == 3)
+ //{
+ // t = CV_MAKETYPE(depth(), 3);
+ //}
+ m.create(wholerows, wholecols, type());
+
+ if(m.channels() == 3)
{
- int pitch = wholecols * 3 * m.elemSize1();
- int tail_padding = m.elemSize1()*3072;
- int err;
- cl_mem temp = clCreateBuffer(clCxt->impl->clContext,CL_MEM_READ_WRITE,
- (pitch*wholerows+tail_padding-1)/tail_padding*tail_padding,0,&err);
- openCLVerifyCall(err);
-
- convert_C4C3(*this, temp, pitch/m.elemSize1());
- openCLMemcpy2D(clCxt,m.data,m.step,temp,pitch,wholecols*m.elemSize(),wholerows,clMemcpyDeviceToHost,3);
- //int* cputemp=new int[wholecols*wholerows * 3];
- //int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
- //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
- // 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
- //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
- // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
- //for(int i=0;i<wholerows;i++)
- //{
- // int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
- // for(int j=0;j<wholecols;j++)
- // {
- // if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
- // printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
- // i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
- // }
- //}
- //delete []cputemp;
- //delete []cpudata;
- openCLSafeCall(clReleaseMemObject(temp));
+ int pitch = wholecols * 3 * m.elemSize1();
+ int tail_padding = m.elemSize1() * 3072;
+ int err;
+ cl_mem temp = clCreateBuffer(clCxt->impl->clContext, CL_MEM_READ_WRITE,
+ (pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
+ openCLVerifyCall(err);
+
+ convert_C4C3(*this, temp, pitch / m.elemSize1());
+ openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
+ //int* cputemp=new int[wholecols*wholerows * 3];
+ //int* cpudata=new int[this->step*this->wholerows/sizeof(int)];
+ //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, temp, CL_TRUE,
+ // 0, wholecols*wholerows * 3* sizeof(int), cputemp, 0, NULL, NULL));
+ //openCLSafeCall(clEnqueueReadBuffer(clCxt->impl->clCmdQueue, (cl_mem)data, CL_TRUE,
+ // 0, this->step*this->wholerows, cpudata, 0, NULL, NULL));
+ //for(int i=0;i<wholerows;i++)
+ //{
+ // int *a = cputemp+i*wholecols * 3,*b = cpudata + i*this->step/sizeof(int);
+ // for(int j=0;j<wholecols;j++)
+ // {
+ // if((a[3*j] != b[4*j])||(a[3*j+1] != b[4*j+1])||(a[3*j+2] != b[4*j+2]))
+ // printf("rows=%d,cols=%d,cputtemp=%d,%d,%d;cpudata=%d,%d,%d\n",
+ // i,j,a[3*j],a[3*j+1],a[3*j+2],b[4*j],b[4*j+1],b[4*j+2]);
+ // }
+ //}
+ //delete []cputemp;
+ //delete []cpudata;
+ openCLSafeCall(clReleaseMemObject(temp));
}
else
- {
- openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost);
- }
+ {
+ openCLMemcpy2D(clCxt, m.data, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost);
+ }
Size wholesize;
Point ofs;
locateROI(wholesize, ofs);
{
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
src.rows == dst.rows && src.cols == dst.cols
- && mask.type() == CV_8UC1);
+ && mask.type() == CV_8UC1);
vector<pair<size_t , const void *> > args;
{"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
{"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
};
- char compile_option[32];
- sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.channels()-1][dst.depth()].c_str());
+ char compile_option[32];
+ sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3];
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option);
+ localThreads, args, -1, -1, compile_option);
}
void cv::ocl::oclMat::copyTo( oclMat &m ) const
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
- localThreads, args, dst.channels(), dst.depth());
+ localThreads, args, dst.oclchannels(), dst.depth());
}
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
{
{
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
}
- char compile_option[32];
- union sc
- {
- cl_uchar4 uval;
- cl_char4 cval;
- cl_ushort4 usval;
- cl_short4 shval;
- cl_int4 ival;
- cl_float4 fval;
- cl_double4 dval;
- }val;
+ char compile_option[32];
+ union sc
+ {
+ cl_uchar4 uval;
+ cl_char4 cval;
+ cl_ushort4 usval;
+ cl_short4 shval;
+ cl_int4 ival;
+ cl_float4 fval;
+ cl_double4 dval;
+ } val;
switch(dst.depth())
{
case CV_8U:
- val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
- val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
- val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
- val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=uchar");
- args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=uchar4");
- args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
+ val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
+ val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
+ val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=uchar");
+ args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=uchar4");
+ args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_8S:
- val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
- val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
- val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
- val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=char");
- args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=char4");
- args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
+ val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
+ val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
+ val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=char");
+ args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=char4");
+ args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_16U:
- val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
- val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
- val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
- val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=ushort");
- args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=ushort4");
- args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
+ val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
+ val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
+ val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=ushort");
+ args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=ushort4");
+ args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_16S:
- val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
- val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
- val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
- val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=short");
- args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=short4");
- args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
+ val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
+ val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
+ val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=short");
+ args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=short4");
+ args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_32S:
- val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
- val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
- val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
- val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=int");
- args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
- break;
- case 2:
- sprintf(compile_option, "-D GENTYPE=int2");
- cl_int2 i2val;
- i2val.s[0] = val.ival.s[0];
- i2val.s[1] = val.ival.s[1];
- args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=int4");
- args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
+ val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
+ val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
+ val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=int");
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
+ break;
+ case 2:
+ sprintf(compile_option, "-D GENTYPE=int2");
+ cl_int2 i2val;
+ i2val.s[0] = val.ival.s[0];
+ i2val.s[1] = val.ival.s[1];
+ args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=int4");
+ args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_32F:
- val.fval.s[0] = scalar.val[0];
- val.fval.s[1] = scalar.val[1];
- val.fval.s[2] = scalar.val[2];
- val.fval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=float");
- args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=float4");
- args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.fval.s[0] = scalar.val[0];
+ val.fval.s[1] = scalar.val[1];
+ val.fval.s[2] = scalar.val[2];
+ val.fval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=float");
+ args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=float4");
+ args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_64F:
- val.dval.s[0] = scalar.val[0];
- val.dval.s[1] = scalar.val[1];
- val.dval.s[2] = scalar.val[2];
- val.dval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=double");
- args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=double4");
- args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.dval.s[0] = scalar.val[0];
+ val.dval.s[1] = scalar.val[1];
+ val.dval.s[2] = scalar.val[2];
+ val.dval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=double");
+ args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=double4");
+ args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
#if CL_VERSION_1_2
- if(dst.offset==0 && dst.cols==dst.wholecols)
- {
- clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL);
- }
- else
- {
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
- openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option);
- }
+ if(dst.offset == 0 && dst.cols == dst.wholecols)
+ {
+ clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+ }
+ else
+ {
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
+ openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
+ localThreads, args, -1, -1, compile_option);
+ }
#else
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option);
+ localThreads, args, -1, -1, compile_option);
#endif
}
globalThreads[1] = (dst.rows + localThreads[1] - 1) / localThreads[1] * localThreads[1];
globalThreads[2] = 1;
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
- char compile_option[32];
- union sc
- {
- cl_uchar4 uval;
- cl_char4 cval;
- cl_ushort4 usval;
- cl_short4 shval;
- cl_int4 ival;
- cl_float4 fval;
- cl_double4 dval;
- }val;
+ char compile_option[32];
+ union sc
+ {
+ cl_uchar4 uval;
+ cl_char4 cval;
+ cl_ushort4 usval;
+ cl_short4 shval;
+ cl_int4 ival;
+ cl_float4 fval;
+ cl_double4 dval;
+ } val;
switch(dst.depth())
{
case CV_8U:
- val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
- val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
- val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
- val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=uchar");
- args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=uchar4");
- args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
+ val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
+ val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
+ val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=uchar");
+ args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=uchar4");
+ args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_8S:
- val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
- val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
- val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
- val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=char");
- args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=char4");
- args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
+ val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
+ val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
+ val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=char");
+ args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=char4");
+ args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_16U:
- val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
- val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
- val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
- val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=ushort");
- args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=ushort4");
- args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
+ val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
+ val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
+ val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=ushort");
+ args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=ushort4");
+ args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_16S:
- val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
- val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
- val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
- val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=short");
- args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=short4");
- args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
+ val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
+ val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
+ val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=short");
+ args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=short4");
+ args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_32S:
- val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
- val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
- val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
- val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=int");
- args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=int4");
- args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
+ val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
+ val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
+ val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=int");
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=int4");
+ args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_32F:
- val.fval.s[0] = scalar.val[0];
- val.fval.s[1] = scalar.val[1];
- val.fval.s[2] = scalar.val[2];
- val.fval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=float");
- args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=float4");
- args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.fval.s[0] = scalar.val[0];
+ val.fval.s[1] = scalar.val[1];
+ val.fval.s[2] = scalar.val[2];
+ val.fval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=float");
+ args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=float4");
+ args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case CV_64F:
- val.dval.s[0] = scalar.val[0];
- val.dval.s[1] = scalar.val[1];
- val.dval.s[2] = scalar.val[2];
- val.dval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=double");
- args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=double4");
- args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.dval.s[0] = scalar.val[0];
+ val.dval.s[1] = scalar.val[1];
+ val.dval.s[2] = scalar.val[2];
+ val.dval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=double");
+ args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=double4");
+ args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.step ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&mask.offset ));
openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option);
+ localThreads, args, -1, -1, compile_option);
}
oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
// (cl_mem)mem,1,0,sizeof(double)*4,s,0,0,0));
if (mask.empty())
{
- if(type()==CV_8UC1)
- {
- set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0");
- }
- else
- {
- set_to_withoutmask_run(*this, scalar, "set_to_without_mask");
- }
+ if(type() == CV_8UC1)
+ {
+ set_to_withoutmask_run(*this, scalar, "set_to_without_mask_C1_D0");
+ }
+ else
+ {
+ set_to_withoutmask_run(*this, scalar, "set_to_without_mask");
+ }
}
else
{
- set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
+ set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
}
return *this;
oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
{
- if( new_rows != 0 && new_rows != rows)\r
- {\r
- CV_Error( CV_StsBadFunc,\r
- "oclMat's number of rows can not be changed for current version" );\r
- }
-
- oclMat hdr = *this;
-
- int cn = channels();\r
- if (new_cn == 0)\r
- new_cn = cn;\r
-\r
- int total_width = cols * cn;\r
-\r
- if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)\r
- new_rows = rows * total_width / new_cn;\r
-\r
- if (new_rows != 0 && new_rows != rows)\r
- {\r
- int total_size = total_width * rows;\r
-\r
- if (!isContinuous())\r
- CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");\r
-\r
- if ((unsigned)new_rows > (unsigned)total_size)\r
- CV_Error(CV_StsOutOfRange, "Bad new number of rows");\r
-\r
- total_width = total_size / new_rows;\r
-\r
- if (total_width * new_rows != total_size)\r
- CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");\r
-\r
- hdr.rows = new_rows;\r
- hdr.step = total_width * elemSize1();\r
- }\r
-\r
- int new_width = total_width / new_cn;\r
-\r
- if (new_width * new_cn != total_width)\r
- CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");\r
-\r
- hdr.cols = new_width;\r
- hdr.wholecols = new_width;\r
- hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);\r
-\r
+ if( new_rows != 0 && new_rows != rows)
+
+ {
+
+ CV_Error( CV_StsBadFunc,
+
+ "oclMat's number of rows can not be changed for current version" );
+
+ }
+
+ oclMat hdr = *this;
+
+ int cn = oclchannels();
+
+ if (new_cn == 0)
+
+ new_cn = cn;
+
+
+
+ int total_width = cols * cn;
+
+
+
+ if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
+
+ new_rows = rows * total_width / new_cn;
+
+
+
+ if (new_rows != 0 && new_rows != rows)
+
+ {
+
+ int total_size = total_width * rows;
+
+
+
+ if (!isContinuous())
+
+ CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
+
+
+
+ if ((unsigned)new_rows > (unsigned)total_size)
+
+ CV_Error(CV_StsOutOfRange, "Bad new number of rows");
+
+
+
+ total_width = total_size / new_rows;
+
+
+
+ if (total_width * new_rows != total_size)
+
+ CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
+
+
+
+ hdr.rows = new_rows;
+
+ hdr.step = total_width * elemSize1();
+
+ }
+
+
+
+ int new_width = total_width / new_cn;
+
+
+
+ if (new_width * new_cn != total_width)
+
+ CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
+
+
+
+ hdr.cols = new_width;
+
+ hdr.wholecols = new_width;
+
+ hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
+
+
+
return hdr;
}
void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
{
clCxt = Context::getContext();
- //cout << "cv::ocl::oclMat::create()." << endl;
-
/* core logic */
_type &= TYPE_MASK;
- download_channels = CV_MAT_CN(_type);
- if(download_channels==3)
- {
- _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
- }
+ //download_channels = CV_MAT_CN(_type);
+ //if(download_channels==3)
+ //{
+ // _type = CV_MAKE_TYPE((CV_MAT_DEPTH(_type)),4);
+ //}
if( rows == _rows && cols == _cols && type() == _type && data )
return;
if( data )
openCLMallocPitch(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows);
//openCLMallocPitch(clCxt,&dev_ptr, &step, esz * cols, rows);
- if (esz *cols == step)
+ if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG;
int64 _nettosize = (int64)step * rows;
step = rows = cols = 0;
offset = wholerows = wholecols = 0;
refcount = 0;
- download_channels=0;
}
#endif /* !defined (HAVE_OPENCL) */
// provide additional methods for the user to interact with the command queue after a task is fired
void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
- int depth, char *build_options, FLUSH_MODE finish_mode)
+ size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
+ int depth, char *build_options, FLUSH_MODE finish_mode)
{
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
if ( localThreads != NULL)
- {
+ {
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
openCLSafeCall(clEnqueueNDRangeKernel(clCxt->impl->clCmdQueue, kernel, 3, NULL, globalThreads,
- localThreads, 0, NULL, NULL));
+ localThreads, 0, NULL, NULL));
switch(finish_mode)
{
}
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
{
openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
- channels, depth, NULL, finish_mode);
+ channels, depth, NULL, finish_mode);
}
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
{
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
- build_options, finish_mode);
+ build_options, finish_mode);
}
}//namespace ocl
DISABLE
};
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
+ size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
- size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
- int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+ size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
+ int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
}//namespace ocl
}//namespace cv
size_t widthInBytes, size_t height);
void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
const void *src, size_t spitch,
- size_t width, size_t height, enum openCLMemcpyKind kind, int channels=-1);
+ size_t width, size_t height, enum openCLMemcpyKind kind, int channels = -1);
void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
const void *src, size_t spitch,
size_t width, size_t height, int src_offset, enum openCLMemcpyKind kind);
void openCLFree(void *devPtr);
- cl_mem openCLCreateBuffer(Context *clCxt,size_t flag, size_t size);
- void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void* host_buffer, size_t size);
+ cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
+ void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
const char **source, string kernelName);
cl_kernel openCLGetKernelFromSource(const Context *clCxt,
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
void openCLExecuteKernel_(Context *clCxt , const char **source, string kernelName,
- size_t globalThreads[3], size_t localThreads[3],
- vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
+ size_t globalThreads[3], size_t localThreads[3],
+ vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth);
void openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
//void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr,
// enum openCLMemcpyKind kind, cl_bool blocking_write);
- int savetofile(const Context *clcxt, cl_program &program, const char *fileName);
- struct Context::Impl
- {
+ int savetofile(const Context *clcxt, cl_program &program, const char *fileName);
+ struct Context::Impl
+ {
//Information of the OpenCL context
cl_context clContext;
cl_command_queue clCmdQueue;
cl_device_id *devices;
- string devName;
+ string devName;
cl_uint maxDimensions;
size_t maxWorkGroupSize;
size_t *maxWorkItemSizes;
int double_support;
//extra options to recognize vendor specific fp64 extensions
char *extra_options;
- string Binpath;
- };
+ string Binpath;
+ };
}
}
// @Authors
// Dachuan Zhao, dachuan@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
-//
+//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
- openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
+ openCLExecuteKernel(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
}
//////////////////////////////////////////////////////////////////////////////
// pyrDown
-void cv::ocl::pyrDown(const oclMat& src, oclMat& dst)
+void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
- dst.download_channels=src.download_channels;
-
pyrdown_run(src, dst);
}
#if !defined (HAVE_OPENCL)
-void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat&, const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
-void cv::ocl::PyrLKOpticalFlow::dense(const oclMat&, const oclMat&, oclMat&, oclMat&, oclMat*) { }
+void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { }
+void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { }
#else /* !defined (HAVE_OPENCL) */
namespace
{
- void calcPatchSize(cv::Size winSize, int cn, dim3& block, dim3& patch, bool isDeviceArch11)
+ void calcPatchSize(cv::Size winSize, int cn, dim3 &block, dim3 &patch, bool isDeviceArch11)
{
winSize.width *= cn;
args.push_back( make_pair( sizeof(cl_float) , (void *)&alpha_f ));
args.push_back( make_pair( sizeof(cl_float) , (void *)&beta_f ));
openCLExecuteKernel2(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
- localThreads, args, dst.channels(), dst.depth(), CLFLUSH);
+ localThreads, args, dst.oclchannels(), dst.depth(), CLFLUSH);
}
void convertTo( const oclMat &src, oclMat &m, int rtype, double alpha = 1, double beta = 0 );
void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta )
if( rtype < 0 )
rtype = src.type();
else
- rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.channels());
+ rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels());
int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
{
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
}
- char compile_option[32];
- union sc
- {
- cl_uchar4 uval;
- cl_char4 cval;
- cl_ushort4 usval;
- cl_short4 shval;
- cl_int4 ival;
- cl_float4 fval;
- cl_double4 dval;
- }val;
+ char compile_option[32];
+ union sc
+ {
+ cl_uchar4 uval;
+ cl_char4 cval;
+ cl_ushort4 usval;
+ cl_short4 shval;
+ cl_int4 ival;
+ cl_float4 fval;
+ cl_double4 dval;
+ } val;
switch(dst.depth())
{
case 0:
- val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
- val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
- val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
- val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=uchar");
- args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=uchar4");
- args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.uval.s[0] = saturate_cast<uchar>(scalar.val[0]);
+ val.uval.s[1] = saturate_cast<uchar>(scalar.val[1]);
+ val.uval.s[2] = saturate_cast<uchar>(scalar.val[2]);
+ val.uval.s[3] = saturate_cast<uchar>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=uchar");
+ args.push_back( make_pair( sizeof(cl_uchar) , (void *)&val.uval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=uchar4");
+ args.push_back( make_pair( sizeof(cl_uchar4) , (void *)&val.uval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 1:
- val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
- val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
- val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
- val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=char");
- args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=char4");
- args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.cval.s[0] = saturate_cast<char>(scalar.val[0]);
+ val.cval.s[1] = saturate_cast<char>(scalar.val[1]);
+ val.cval.s[2] = saturate_cast<char>(scalar.val[2]);
+ val.cval.s[3] = saturate_cast<char>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=char");
+ args.push_back( make_pair( sizeof(cl_char) , (void *)&val.cval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=char4");
+ args.push_back( make_pair( sizeof(cl_char4) , (void *)&val.cval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 2:
- val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
- val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
- val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
- val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=ushort");
- args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=ushort4");
- args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.usval.s[0] = saturate_cast<ushort>(scalar.val[0]);
+ val.usval.s[1] = saturate_cast<ushort>(scalar.val[1]);
+ val.usval.s[2] = saturate_cast<ushort>(scalar.val[2]);
+ val.usval.s[3] = saturate_cast<ushort>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=ushort");
+ args.push_back( make_pair( sizeof(cl_ushort) , (void *)&val.usval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=ushort4");
+ args.push_back( make_pair( sizeof(cl_ushort4) , (void *)&val.usval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 3:
- val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
- val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
- val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
- val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=short");
- args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=short4");
- args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.shval.s[0] = saturate_cast<short>(scalar.val[0]);
+ val.shval.s[1] = saturate_cast<short>(scalar.val[1]);
+ val.shval.s[2] = saturate_cast<short>(scalar.val[2]);
+ val.shval.s[3] = saturate_cast<short>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=short");
+ args.push_back( make_pair( sizeof(cl_short) , (void *)&val.shval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=short4");
+ args.push_back( make_pair( sizeof(cl_short4) , (void *)&val.shval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 4:
- val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
- val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
- val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
- val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=int");
- args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
- break;
- case 2:
- sprintf(compile_option, "-D GENTYPE=int2");
- cl_int2 i2val;
- i2val.s[0] = val.ival.s[0];
- i2val.s[1] = val.ival.s[1];
- args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=int4");
- args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.ival.s[0] = saturate_cast<int>(scalar.val[0]);
+ val.ival.s[1] = saturate_cast<int>(scalar.val[1]);
+ val.ival.s[2] = saturate_cast<int>(scalar.val[2]);
+ val.ival.s[3] = saturate_cast<int>(scalar.val[3]);
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=int");
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&val.ival.s[0] ));
+ break;
+ case 2:
+ sprintf(compile_option, "-D GENTYPE=int2");
+ cl_int2 i2val;
+ i2val.s[0] = val.ival.s[0];
+ i2val.s[1] = val.ival.s[1];
+ args.push_back( make_pair( sizeof(cl_int2) , (void *)&i2val ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=int4");
+ args.push_back( make_pair( sizeof(cl_int4) , (void *)&val.ival ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 5:
- val.fval.s[0] = (float)scalar.val[0];
- val.fval.s[1] = (float)scalar.val[1];
- val.fval.s[2] = (float)scalar.val[2];
- val.fval.s[3] = (float)scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=float");
- args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=float4");
- args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.fval.s[0] = (float)scalar.val[0];
+ val.fval.s[1] = (float)scalar.val[1];
+ val.fval.s[2] = (float)scalar.val[2];
+ val.fval.s[3] = (float)scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=float");
+ args.push_back( make_pair( sizeof(cl_float) , (void *)&val.fval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=float4");
+ args.push_back( make_pair( sizeof(cl_float4) , (void *)&val.fval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
case 6:
- val.dval.s[0] = scalar.val[0];
- val.dval.s[1] = scalar.val[1];
- val.dval.s[2] = scalar.val[2];
- val.dval.s[3] = scalar.val[3];
- switch(dst.channels())
- {
- case 1:
- sprintf(compile_option, "-D GENTYPE=double");
- args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
- break;
- case 4:
- sprintf(compile_option, "-D GENTYPE=double4");
- args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unsupported channels");
- }
+ val.dval.s[0] = scalar.val[0];
+ val.dval.s[1] = scalar.val[1];
+ val.dval.s[2] = scalar.val[2];
+ val.dval.s[3] = scalar.val[3];
+ switch(dst.oclchannels())
+ {
+ case 1:
+ sprintf(compile_option, "-D GENTYPE=double");
+ args.push_back( make_pair( sizeof(cl_double) , (void *)&val.dval.s[0] ));
+ break;
+ case 4:
+ sprintf(compile_option, "-D GENTYPE=double4");
+ args.push_back( make_pair( sizeof(cl_double4) , (void *)&val.dval ));
+ break;
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unsupported channels");
+ }
break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"unknown depth");
+ default:
+ CV_Error(CV_StsUnsupportedFormat, "unknown depth");
}
#if CL_VERSION_1_2
- if(dst.offset==0 && dst.cols==dst.wholecols)
- {
- clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue,(cl_mem)dst.data,args[0].second,args[0].first,0,dst.step*dst.rows,0,NULL,NULL);
- }
- else
- {
- args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
- args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
+ if(dst.offset == 0 && dst.cols == dst.wholecols)
+ {
+ clEnqueueFillBuffer(dst.clCxt->impl->clCmdQueue, (cl_mem)dst.data, args[0].second, args[0].first, 0, dst.step * dst.rows, 0, NULL, NULL);
+ }
+ else
+ {
+ args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
+ args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option, CLFLUSH);
- }
+ localThreads, args, -1, -1, compile_option, CLFLUSH);
+ }
#else
args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
args.push_back( make_pair( sizeof(cl_int) , (void *)&offset_in_pixel));
openCLExecuteKernel2(dst.clCxt , &operator_setTo, kernelName, globalThreads,
- localThreads, args, -1, -1,compile_option, CLFLUSH);
+ localThreads, args, -1, -1, compile_option, CLFLUSH);
#endif
}
CV_Assert( src.depth() >= 0 && src.depth() <= 6 );
CV_DbgAssert( !src.empty());
- if(src.type()==CV_8UC1)
- {
- set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0");
- }
- else
- {
- set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask");
- }
+ if(src.type() == CV_8UC1)
+ {
+ set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask_C1_D0");
+ }
+ else
+ {
+ set_to_withoutmask_run_cus(src, scalar, "set_to_without_mask");
+ }
return src;
}
void arithmetic_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString, void *_scalar)
{
- if(src1.clCxt -> impl -> double_support ==0 && src1.type() == CV_64F)
+ if(src1.clCxt -> impl -> double_support == 0 && src1.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
//dst.create(src1.size(), src1.type());
//CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols &&
// src1.rows == src2.rows && src2.rows == dst.rows);
- CV_Assert(src1.cols == dst.cols &&
+ CV_Assert(src1.cols == dst.cols &&
src1.rows == dst.rows);
CV_Assert(src1.type() == dst.type());
//int cols = divUp(dst.cols * channels + offset_cols, vector_length);
size_t localThreads[3] = { 16, 16, 1 };
- //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- // divUp(dst.rows, localThreads[1]) * localThreads[1],
- // 1
- // };
- size_t globalThreads[3] = { src1.cols,
+ //size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
+ // divUp(dst.rows, localThreads[1]) * localThreads[1],
+ // 1
+ // };
+ size_t globalThreads[3] = { src1.cols,
src1.rows,
1
};
//if(_scalar != NULL)
//{
- float scalar1 = *((float *)_scalar);
- args.push_back( make_pair( sizeof(float), (float *)&scalar1 ));
+ float scalar1 = *((float *)_scalar);
+ args.push_back( make_pair( sizeof(float), (float *)&scalar1 ));
//}
openCLExecuteKernel2(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, src1.depth(), CLFLUSH);
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
- openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.channels(), src.depth(), CLFLUSH);
+ openCLExecuteKernel2(clCxt, &pyr_down, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth(), CLFLUSH);
}
-void pyrDown_cus(const oclMat& src, oclMat& dst)
+void pyrDown_cus(const oclMat &src, oclMat &dst)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
//
//void callT(const oclMat& src, oclMat& dst, MultiplyScalar op, int mask)
//{
-// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
+// if (!isAligned(src.data, 4 * sizeof(double)) || !isAligned(src.step, 4 * sizeof(double)) ||
// !isAligned(dst.data, 4 * sizeof(double)) || !isAligned(dst.step, 4 * sizeof(double)))
// {
// callF(src, dst, op, mask);
// //}
//}
-cl_mem bindTexture(const oclMat& mat, int depth, int channels)
+cl_mem bindTexture(const oclMat &mat, int depth, int channels)
{
- cl_mem texture;
+ cl_mem texture;
cl_image_format format;
int err;
- if(depth == 0)
- {
- format.image_channel_data_type = CL_UNSIGNED_INT8;
- }
- else if(depth == 5)
- {
- format.image_channel_data_type = CL_FLOAT;
- }
- if(channels == 1)
- {
- format.image_channel_order = CL_R;
- }
- else if(channels == 3)
- {
- format.image_channel_order = CL_RGB;
- }
- else if(channels == 4)
- {
- format.image_channel_order = CL_RGBA;
- }
+ if(depth == 0)
+ {
+ format.image_channel_data_type = CL_UNSIGNED_INT8;
+ }
+ else if(depth == 5)
+ {
+ format.image_channel_data_type = CL_FLOAT;
+ }
+ if(channels == 1)
+ {
+ format.image_channel_order = CL_R;
+ }
+ else if(channels == 3)
+ {
+ format.image_channel_order = CL_RGB;
+ }
+ else if(channels == 4)
+ {
+ format.image_channel_order = CL_RGBA;
+ }
#if CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
- desc.image_width = mat.step / mat.elemSize();
+ desc.image_width = mat.step / mat.elemSize();
desc.image_height = mat.rows;
desc.image_depth = NULL;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
- desc.image_slice_pitch= 0;
+ desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
texture = clCreateImage2D(
- mat.clCxt->impl->clContext,
- CL_MEM_READ_WRITE,
- &format,
- mat.step / mat.elemSize(),
- mat.rows,
- 0,
- NULL,
- &err);
+ mat.clCxt->impl->clContext,
+ CL_MEM_READ_WRITE,
+ &format,
+ mat.step / mat.elemSize(),
+ mat.rows,
+ 0,
+ NULL,
+ &err);
#endif
- size_t origin[] = { 0, 0, 0 };
- size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
- clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
+ size_t origin[] = { 0, 0, 0 };
+ size_t region[] = { mat.step / mat.elemSize(), mat.rows, 1 };
+ clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, texture, 0, origin, region, 0, NULL, 0);
openCLSafeCall(err);
- return texture;
+ return texture;
}
void releaseTexture(cl_mem texture)
{
- openCLFree(texture);
+ openCLFree(texture);
}
-void lkSparse_run(oclMat& I, oclMat& J,
- const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err, bool GET_MIN_EIGENVALS, int ptcount,
- int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
+void lkSparse_run(oclMat &I, oclMat &J,
+ const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err, bool GET_MIN_EIGENVALS, int ptcount,
+ int level, /*dim3 block, */dim3 patch, Size winSize, int iters)
{
Context *clCxt = I.clCxt;
string kernelName = "lkSparse";
- size_t localThreads[3] = { 8, 32, 1 };
+ size_t localThreads[3] = { 8, 32, 1 };
size_t globalThreads[3] = { 8 * ptcount, 32, 1};
- int cn = I.channels();
+ int cn = I.oclchannels();
- bool calcErr;
+ bool calcErr;
if (err)
{
- calcErr = true;
+ calcErr = true;
}
else
{
- calcErr = false;
+ calcErr = false;
}
- calcErr = true;
+ calcErr = true;
- cl_mem ITex = bindTexture(I, I.depth(), cn);
- cl_mem JTex = bindTexture(J, J.depth(), cn);
+ cl_mem ITex = bindTexture(I, I.depth(), cn);
+ cl_mem JTex = bindTexture(J, J.depth(), cn);
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
args.push_back( make_pair( sizeof(cl_char), (void *)&GET_MIN_EIGENVALS ));
- openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
-
- releaseTexture(ITex);
- releaseTexture(JTex);
+ openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+
+ releaseTexture(ITex);
+ releaseTexture(JTex);
}
-void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err)
+void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err)
{
if (prevPts.empty())
{
iters = std::min(std::max(iters, 0), 100);
- const int cn = prevImg.channels();
+ const int cn = prevImg.oclchannels();
dim3 block, patch;
- calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
+ calcPatchSize(winSize, cn, block, patch, isDeviceArch11_);
CV_Assert(derivLambda >= 0);
CV_Assert(maxLevel >= 0 && winSize.width > 2 && winSize.height > 2);
oclMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
oclMat temp2 = nextPts.reshape(1);
- //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
- multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
- //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
+ //oclMat scalar(temp1.rows, temp1.cols, temp1.type(), Scalar(1.0f / (1 << maxLevel) / 2.0f));
+ multiply_cus(temp1, temp2, 1.0f / (1 << maxLevel) / 2.0f);
+ //::multiply(temp1, 1.0f / (1 << maxLevel) / 2.0f, temp2);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
//status.setTo(Scalar::all(1));
}
else
{
- //oclMat buf_;
- // cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
- // buf_.convertTo(prevPyr_[0], CV_32F);
+ //oclMat buf_;
+ // cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
+ // buf_.convertTo(prevPyr_[0], CV_32F);
- // cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
- // buf_.convertTo(nextPyr_[0], CV_32F);
+ // cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
+ // buf_.convertTo(nextPyr_[0], CV_32F);
}
for (int level = 1; level <= maxLevel; ++level)
for (int level = maxLevel; level >= 0; level--)
{
- lkSparse_run(prevPyr_[level], nextPyr_[level],
- prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols,
- level, /*block, */patch, winSize, iters);
+ lkSparse_run(prevPyr_[level], nextPyr_[level],
+ prevPts, nextPts, status, level == 0 && err ? err : 0, getMinEigenVals, prevPts.cols,
+ level, /*block, */patch, winSize, iters);
}
- clFinish(prevImg.clCxt->impl->clCmdQueue);
+ clFinish(prevImg.clCxt->impl->clCmdQueue);
}
-void lkDense_run(oclMat& I, oclMat& J, oclMat& u, oclMat& v,
- oclMat& prevU, oclMat& prevV, oclMat* err, Size winSize, int iters)
+void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
+ oclMat &prevU, oclMat &prevV, oclMat *err, Size winSize, int iters)
{
Context *clCxt = I.clCxt;
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { I.cols, I.rows, 1};
- int cn = I.channels();
+ int cn = I.oclchannels();
- bool calcErr;
+ bool calcErr;
if (err)
{
- calcErr = true;
+ calcErr = true;
}
else
{
- calcErr = false;
+ calcErr = false;
}
- cl_mem ITex = bindTexture(I, I.depth(), cn);
- cl_mem JTex = bindTexture(J, J.depth(), cn);
+ cl_mem ITex = bindTexture(I, I.depth(), cn);
+ cl_mem JTex = bindTexture(J, J.depth(), cn);
- //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
+ //int2 halfWin = {(winSize.width - 1) / 2, (winSize.height - 1) / 2};
//const int patchWidth = 16 + 2 * halfWin.x;
//const int patchHeight = 16 + 2 * halfWin.y;
//size_t smem_size = 3 * patchWidth * patchHeight * sizeof(int);
args.push_back( make_pair( sizeof(cl_int), (void *)&I.cols ));
//args.push_back( make_pair( sizeof(cl_mem), (void *)&(*err).data ));
//args.push_back( make_pair( sizeof(cl_int), (void *)&(*err).step ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
- args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.width ));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&winSize.height ));
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
- openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.channels(), I.depth(), CLFLUSH);
-
- releaseTexture(ITex);
- releaseTexture(JTex);
+ openCLExecuteKernel2(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), CLFLUSH);
+
+ releaseTexture(ITex);
+ releaseTexture(JTex);
}
-void cv::ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err)
+void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err)
{
CV_Assert(prevImg.type() == CV_8UC1);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
uPyr_[1].setTo(Scalar::all(0));
vPyr_[1].setTo(Scalar::all(0));
- Size winSize2i(winSize.width, winSize.height);
+ Size winSize2i(winSize.width, winSize.height);
int idx = 0;
int idx2 = (idx + 1) & 1;
lkDense_run(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
- level == 0 ? err : 0, winSize2i, iters);
+ level == 0 ? err : 0, winSize2i, iters);
if (level > 0)
idx = idx2;
// @Authors
// Zhang Chunpeng chunpeng@multicorewareinc.com
// Yao Wang, yao@multicorewareinc.com
-//
+//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
using namespace std;
#ifndef HAVE_OPENCL
-void cv::ocl::pyrUp(const oclMat&, GpuMat&, oclMat&) { throw_nogpu(); }
+void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &)
+{
+ throw_nogpu();
+}
#else
-namespace cv { namespace ocl
-{
- extern const char *pyr_up;
- void pyrUp(const cv::ocl::oclMat& src,cv::ocl::oclMat& dst)
- {
- dst.create(src.rows * 2, src.cols * 2, src.type());
- dst.download_channels=src.download_channels;
- Context *clCxt = src.clCxt;
-
- const std::string kernelName = "pyrUp";
-
- std::vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
-
- size_t globalThreads[3] = {dst.cols, dst.rows, 1};
- size_t localThreads[3] = {16, 16, 1};
-
- openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());
- }
-}};
+namespace cv
+{
+ namespace ocl
+ {
+ extern const char *pyr_up;
+ void pyrUp(const cv::ocl::oclMat &src, cv::ocl::oclMat &dst)
+ {
+ dst.create(src.rows * 2, src.cols * 2, src.type());
+
+ Context *clCxt = src.clCxt;
+
+ const std::string kernelName = "pyrUp";
+
+ std::vector< pair<size_t, const void *> > args;
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data));
+ args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&src.step));
+ args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step));
+
+ size_t globalThreads[3] = {dst.cols, dst.rows, 1};
+ size_t localThreads[3] = {16, 16, 1};
+
+
+ openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
+ }
+ }
+};
#endif // HAVE_OPENCL
\ No newline at end of file
void merge_vector_run_no_roi(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{
Context *clCxt = mat_dst.clCxt;
- int channels = mat_dst.channels();
+ int channels = mat_dst.oclchannels();
int depth = mat_dst.depth();
string kernelName = "merge_vector";
{4, 4, 2, 2, 1, 1, 1}
};
- size_t index = indexes[channels-1][mat_dst.depth()];
+ size_t index = indexes[channels - 1][mat_dst.depth()];
int cols = divUp(mat_dst.cols, index);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(mat_dst.rows, localThreads[1]) * localThreads[1],
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
1
};
void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
{
- if(mat_dst.clCxt -> impl -> double_support ==0 && mat_dst.type() == CV_64F)
+ if(mat_dst.clCxt -> impl -> double_support == 0 && mat_dst.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
Context *clCxt = mat_dst.clCxt;
- int channels = mat_dst.channels();
+ int channels = mat_dst.oclchannels();
int depth = mat_dst.depth();
string kernelName = "merge_vector";
{1, 1, 1, 1, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][depth];
+ size_t vector_length = vector_lengths[channels - 1][depth];
int offset_cols = (mat_dst.offset / mat_dst.elemSize()) & (vector_length - 1);
int cols = divUp(mat_dst.cols + offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(mat_dst.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(mat_dst.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
int dst_step1 = mat_dst.cols * mat_dst.elemSize();
vector<pair<size_t , const void *> > args;
// if channel == 3, then the matrix will convert to channel =4
//if(n == 3)
- // args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
+ // args.push_back( make_pair( sizeof(cl_int), (void *)&offset_cols));
if(n == 3)
{
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].step));
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[2].offset));
}
- else if( n== 4)
+ else if( n == 4)
{
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src[3].data));
args.push_back( make_pair( sizeof(cl_int), (void *)&mat_src[3].step));
CV_Assert(depth == mat_src[i].depth());
CV_Assert(size == mat_src[i].size());
- total_channels += mat_src[i].channels();
+ total_channels += mat_src[i].oclchannels();
}
CV_Assert(total_channels <= 4);
void split_vector_run_no_roi(const oclMat &mat_src, oclMat *mat_dst)
{
Context *clCxt = mat_src.clCxt;
- int channels = mat_src.channels();
+ int channels = mat_src.oclchannels();
int depth = mat_src.depth();
string kernelName = "split_vector";
{4, 4, 2, 2, 1, 1, 1}
};
- size_t index = indexes[channels-1][mat_dst[0].depth()];
+ size_t index = indexes[channels - 1][mat_dst[0].depth()];
int cols = divUp(mat_src.cols, index);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(mat_src.rows, localThreads[1]) * localThreads[1],
- 1
- };
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(mat_src.rows, localThreads[1]) *localThreads[1],
+ 1
+ };
vector<pair<size_t , const void *> > args;
args.push_back( make_pair( sizeof(cl_mem), (void *)&mat_src.data));
void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
{
- if(mat_src.clCxt -> impl -> double_support ==0 && mat_src.type() == CV_64F)
+ if(mat_src.clCxt -> impl -> double_support == 0 && mat_src.type() == CV_64F)
{
- CV_Error(CV_GpuNotSupported,"Selected device don't support double\r\n");
+ CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
return;
}
Context *clCxt = mat_src.clCxt;
- int channels = mat_src.channels();
+ int channels = mat_src.oclchannels();
int depth = mat_src.depth();
string kernelName = "split_vector";
{4, 4, 2, 2, 1, 1, 1}
};
- size_t vector_length = vector_lengths[channels-1][mat_dst[0].depth()];
+ size_t vector_length = vector_lengths[channels - 1][mat_dst[0].depth()];
int max_offset_cols = 0;
for(int i = 0; i < channels; i++)
: divUp(mat_src.cols + max_offset_cols, vector_length);
size_t localThreads[3] = { 64, 4, 1 };
- size_t globalThreads[3] = { divUp(cols, localThreads[0]) * localThreads[0],
- divUp(mat_src.rows, localThreads[1]) * localThreads[1], 1
+ size_t globalThreads[3] = { divUp(cols, localThreads[0]) *localThreads[0],
+ divUp(mat_src.rows, localThreads[1]) *localThreads[1], 1
};
int dst_step1 = mat_dst[0].cols * mat_dst[0].elemSize();
CV_Assert(mat_dst);
int depth = mat_src.depth();
- int num_channels = mat_src.channels();
+ int num_channels = mat_src.oclchannels();
Size size = mat_src.size();
if(num_channels == 1)
}
void cv::ocl::split(const oclMat &src, vector<oclMat> &dst)
{
- dst.resize(src.channels());
- if(src.channels() > 0)
+ dst.resize(src.oclchannels());
+ if(src.oclchannels() > 0)
split_merge::split(src, &dst[0]);
}
#endif /* !defined (HAVE_OPENCL) */
//M*/
#include <iomanip>
#include "precomp.hpp"
-#include "opencv2/highgui/highgui.hpp"
+//#include "opencv2/highgui/highgui.hpp"
using namespace cv;
using namespace cv::ocl;
#if !defined (HAVE_OPENCL)
-cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); }
-cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); }
-int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;}
-void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>&, oclMat&) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector<float>&) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, oclMat&, bool) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
-void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); }
+cv::ocl::SURF_OCL::SURF_OCL()
+{
+ throw_nogpu();
+}
+cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool)
+{
+ throw_nogpu();
+}
+int cv::ocl::SURF_OCL::descriptorSize() const
+{
+ throw_nogpu();
+ return 0;
+}
+void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &, oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector<KeyPoint> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector<float> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, oclMat &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector<KeyPoint> &, vector<float> &, bool)
+{
+ throw_nogpu();
+}
+void cv::ocl::SURF_OCL::releaseMemory()
+{
+ throw_nogpu();
+}
#else /* !defined (HAVE_OPENCL) */
-namespace cv { namespace ocl
+namespace cv
{
- ///////////////////////////OpenCL kernel strings///////////////////////////
- extern const char * nonfree_surf;
-}}
+ namespace ocl
+ {
+ ///////////////////////////OpenCL kernel strings///////////////////////////
+ extern const char *nonfree_surf;
+ }
+}
static inline int divUp(int total, int grain)
{
public:
// facilities
- void bindImgTex(const oclMat& img, cl_mem & texture);
+ void bindImgTex(const oclMat &img, cl_mem &texture);
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
// kernel callers declearations
- void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
+ void icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int layer_rows);
- void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
- int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
+ void icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
+ int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
- void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
- oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
+ void icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
+ oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures);
- void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
+ void icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures);
- void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
+ void compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures);
// end of kernel callers declearations
- SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
- surf_(surf),
+ SURF_OCL_Invoker(SURF_OCL &surf, const oclMat &img, const oclMat &mask) :
+ surf_(surf),
img_cols(img.cols), img_rows(img.rows),
use_mask(!mask.empty()),
imgTex(NULL), sumTex(NULL), maskSumTex(NULL)
// temp fix for missing min overload
oclMat temp(mask.size(), mask.type());
temp.setTo(Scalar::all(1.0));
- //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
+ //cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
integral(surf_.mask1, surf_.maskSum);
bindImgTex(surf_.maskSum, maskSumTex);
}
}
- void detectKeypoints(oclMat& keypoints)
+ void detectKeypoints(oclMat &keypoints)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from gaussin kernel.
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
- octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
+ octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
if (maxCounter > 0)
{
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
- keypoints, counters, octave, layer_rows, maxFeatures);
+ keypoints, counters, octave, layer_rows, maxFeatures);
}
}
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
findOrientation(keypoints);
}
- void findOrientation(oclMat& keypoints)
+ void findOrientation(oclMat &keypoints)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
}
}
- void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
+ void computeDescriptors(const oclMat &keypoints, oclMat &descriptors, int descriptorSize)
{
const int nFeatures = keypoints.cols;
if (nFeatures > 0)
}
private:
- SURF_OCL& surf_;
+ SURF_OCL &surf_;
int img_cols, img_rows;
oclMat additioalParamBuffer;
- SURF_OCL_Invoker& operator= (const SURF_OCL_Invoker& right)
- {
+ SURF_OCL_Invoker &operator= (const SURF_OCL_Invoker &right)
+ {
(*this) = right;
return *this;
} // remove warning C4512
return extended ? 128 : 64;
}
-void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMat& keypointsGPU)
+void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint> &keypoints, oclMat &keypointsGPU)
{
if (keypoints.empty())
keypointsGPU.release();
{
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
- float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
- float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
- int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
- int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
- float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
- float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
- float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
+ float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
+ float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
+ int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
+ int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
+ float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
+ float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
+ float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
for (size_t i = 0, size = keypoints.size(); i < size; ++i)
{
- const KeyPoint& kp = keypoints[i];
+ const KeyPoint &kp = keypoints[i];
kp_x[i] = kp.pt.x;
kp_y[i] = kp.pt.y;
kp_octave[i] = kp.octave;
}
}
-void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<KeyPoint>& keypoints)
+void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &keypointsGPU, vector<KeyPoint> &keypoints)
{
const int nFeatures = keypointsGPU.cols;
keypoints.resize(nFeatures);
- float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
- float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
- int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
- int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
- float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
- float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
- float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
+ float *kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
+ float *kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
+ int *kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
+ int *kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
+ float *kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
+ float *kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
+ float *kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
for (int i = 0; i < nFeatures; ++i)
{
- KeyPoint& kp = keypoints[i];
+ KeyPoint &kp = keypoints[i];
kp.pt.x = kp_x[i];
kp.pt.y = kp_y[i];
kp.class_id = kp_laplacian[i];
}
}
-void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector<float>& descriptors)
+void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &descriptorsGPU, vector<float> &descriptors)
{
if (descriptorsGPU.empty())
descriptors.clear();
}
}
-void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
+void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints)
{
if (!img.empty())
{
}
}
-void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
- bool useProvidedKeypoints)
+void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat &keypoints, oclMat &descriptors,
+ bool useProvidedKeypoints)
{
if (!img.empty())
{
}
}
-void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints)
+void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints)
{
oclMat keypointsGPU;
downloadKeypoints(keypointsGPU, keypoints);
}
-void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
- oclMat& descriptors, bool useProvidedKeypoints)
+void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints,
+ oclMat &descriptors, bool useProvidedKeypoints)
{
oclMat keypointsGPU;
downloadKeypoints(keypointsGPU, keypoints);
}
-void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
- vector<float>& descriptors, bool useProvidedKeypoints)
+void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, vector<KeyPoint> &keypoints,
+ vector<float> &descriptors, bool useProvidedKeypoints)
{
oclMat descriptorsGPU;
// bind source buffer to image oject.
-void SURF_OCL_Invoker::bindImgTex(const oclMat& img, cl_mem& texture)
+void SURF_OCL_Invoker::bindImgTex(const oclMat &img, cl_mem &texture)
{
cl_image_format format;
int err;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
- desc.image_slice_pitch= 0;
+ desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
+ texture = clCreateImage(Context::getContext()->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
texture = clCreateImage2D(
- Context::getContext()->impl->clContext,
- CL_MEM_READ_WRITE,
- &format,
- img.step / img.elemSize(),
- img.rows,
- 0,
- NULL,
- &err);
+ Context::getContext()->impl->clContext,
+ CL_MEM_READ_WRITE,
+ &format,
+ img.step / img.elemSize(),
+ img.rows,
+ 0,
+ NULL,
+ &err);
#endif
- size_t origin[] = { 0, 0, 0 };
- size_t region[] = { img.step/img.elemSize(), img.rows, 1 };
+ size_t origin[] = { 0, 0, 0 };
+ size_t region[] = { img.step / img.elemSize(), img.rows, 1 };
clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0);
openCLSafeCall(err);
}
////////////////////////////
// kernel caller definitions
-void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows)
+void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, int octave, int nOctaveLayers, int c_layer_rows)
{
const int min_size = calcSize(octave, 0);
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3] = {
- divUp(max_samples_j, localThreads[0]) * localThreads[0],
- divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2),
- 1};
- openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
+ size_t globalThreads[3] =
+ {
+ divUp(max_samples_j, localThreads[0]) *localThreads[0],
+ divUp(max_samples_i, localThreads[1]) *localThreads[1] *(nOctaveLayers + 2),
+ 1
+ };
+ openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
- int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
+void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat &trace, oclMat &maxPosBuffer, oclMat &maxCounter, int counterOffset,
+ int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
{
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
}
size_t localThreads[3] = {16, 16, 1};
- size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0],
- divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1],
- 1};
+ size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) *localThreads[0],
+ divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) *nLayers *localThreads[1],
+ 1
+ };
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
- oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures)
+void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMat &maxPosBuffer, unsigned int maxCounter,
+ oclMat &keypoints, oclMat &counters, int octave, int layer_rows, int maxFeatures)
{
Context *clCxt = det.clCxt;
string kernelName = "icvInterpolateKeypoint";
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
size_t localThreads[3] = {3, 3, 3};
- size_t globalThreads[3] = {maxCounter * localThreads[0], localThreads[1], 1};
+ size_t globalThreads[3] = {maxCounter *localThreads[0], localThreads[1], 1};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures)
+void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeatures)
{
- Context * clCxt = counters.clCxt;
+ Context *clCxt = counters.clCxt;
string kernelName = "icvCalcOrientation";
vector< pair<size_t, const void *> > args;
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
size_t localThreads[3] = {32, 4, 1};
- size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1};
+ size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
}
-void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures)
+void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const oclMat &keypoints, int nFeatures)
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
Context *clCxt = descriptors.clCxt;
print_info();
- std::vector<cv::ocl::Info> oclinfo;
- int devnums = getDevice(oclinfo);
- if(devnums<1)
- {
- std::cout << "no device found\n";
- return -1;
- }
- //setDevice(oclinfo[2]);
+ std::vector<cv::ocl::Info> oclinfo;
+ int devnums = getDevice(oclinfo);
+ if(devnums < 1)
+ {
+ std::cout << "no device found\n";
+ return -1;
+ }
+ //setDevice(oclinfo[1]);
return RUN_ALL_TESTS();
}
src1y = rng.uniform(0, mat1.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
+ maskx = rng.uniform(0, mask.cols - roicols);
+ masky = rng.uniform(0, mask.rows - roirows);
+ src2x = rng.uniform(0, mat2.cols - roicols);
+ src2y = rng.uniform(0, mat2.rows - roirows);
#else
roicols = mat1.cols;
roirows = mat1.rows;
src1y = 0;
dstx = 0;
dsty = 0;
+ maskx = 0;
+ masky = 0;
+ src2x = 0;
+ src2y = 0;
#endif
- maskx = rng.uniform(0, mask.cols - roicols);
- masky = rng.uniform(0, mask.rows - roirows);
- src2x = rng.uniform(0, mat2.cols - roicols);
- src2y = rng.uniform(0, mat2.rows - roirows);
mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows));
mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows));
mask_roi = mask(Rect(maskx, masky, roicols, roirows));
float val1 = mat1.at<float>(i, j);
float val2 = mat2.at<float>(i, j);
- ((float *)(dst.data))[i *dst.step/4 +j] = val1 * val1 + val2 * val2;
+ ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2;
// float val1 =((float *)( mat1.data))[(i*mat1.step/8 +j)*2];
//
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false)));
INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine(
- Values(CV_8UC1,CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine(
- Values(CV_32FC1, CV_32FC3,CV_32FC4),
+ Values(CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine(
- Values(CV_32FC1, CV_32FC3,CV_32FC4),
+ Values(CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine(
- Values(CV_32FC1, CV_32FC3,CV_32FC4),
+ Values(CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32FC1),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine(
Values(false)));
-INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
+INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
// Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1,CV_32FC3, CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine(
- Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC3,CV_32FC4), Values(false)));
+ Values(CV_8UC1, CV_8UC3, CV_32SC1, CV_32FC1, CV_32FC3, CV_32FC4), Values(false)));
//Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_32SC1, CV_32FC1), Values(false)));
using namespace cvtest;
using namespace testing;
using namespace std;
-
+#ifdef HAVE_OPENCL
template <typename T>
-void blendLinearGold(const cv::Mat& img1, const cv::Mat& img2, const cv::Mat& weights1, const cv::Mat& weights2, cv::Mat& result_gold)
+void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold)
{
result_gold.create(img1.size(), img1.type());
for (int y = 0; y < img1.rows; ++y)
{
- const float* weights1_row = weights1.ptr<float>(y);
- const float* weights2_row = weights2.ptr<float>(y);
- const T* img1_row = img1.ptr<T>(y);
- const T* img2_row = img2.ptr<T>(y);
- T* result_gold_row = result_gold.ptr<T>(y);
+ const float *weights1_row = weights1.ptr<float>(y);
+ const float *weights2_row = weights2.ptr<float>(y);
+ const T *img1_row = img1.ptr<T>(y);
+ const T *img2_row = img2.ptr<T>(y);
+ T *result_gold_row = result_gold.ptr<T>(y);
for (int x = 0; x < img1.cols * cn; ++x)
{
cv::Mat weights1 = randomMat(size, CV_32F, 0, 1);
cv::Mat weights2 = randomMat(size, CV_32F, 0, 1);
- cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
- cv::ocl::oclMat dst(size, type);
- gimg1.upload(img1);
- gimg2.upload(img2);
- gweights1.upload(weights1);
- gweights2.upload(weights2);
- cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
- cv::Mat result;
+ cv::ocl::oclMat gimg1(size, type), gimg2(size, type), gweights1(size, CV_32F), gweights2(size, CV_32F);
+ cv::ocl::oclMat dst(size, type);
+ gimg1.upload(img1);
+ gimg2.upload(img2);
+ gweights1.upload(weights1);
+ gweights2.upload(weights2);
+ cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, dst);
+ cv::Mat result;
cv::Mat result_gold;
- dst.download(result);
+ dst.download(result);
if (depth == CV_8U)
blendLinearGold<uchar>(img1, img2, weights1, weights2, result_gold);
else
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine(
- DIFFERENT_SIZES,
- testing::Values(MatType(CV_8UC1), MatType(CV_8UC3),MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
-));
\ No newline at end of file
+ DIFFERENT_SIZES,
+ testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
+ ));
+#endif
\ No newline at end of file
//M*/
#include "precomp.hpp"
+#ifdef HAVE_OPENCL
+namespace
+{
-namespace {
-
-/////////////////////////////////////////////////////////////////////////////////////////////////
-// BruteForceMatcher
+ /////////////////////////////////////////////////////////////////////////////////////////////////
+ // BruteForceMatcher
-CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
-IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
+ CV_ENUM(DistType, cv::ocl::BruteForceMatcher_OCL_base::L1Dist, cv::ocl::BruteForceMatcher_OCL_base::L2Dist, cv::ocl::BruteForceMatcher_OCL_base::HammingDist)
+ IMPLEMENT_PARAM_CLASS(DescriptorSize, int)
-PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
-{
- //std::vector<cv::ocl::Info> oclinfo;
- cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
- int normCode;
- int dim;
+ PARAM_TEST_CASE(BruteForceMatcher/*, NormCode*/, DistType, DescriptorSize)
+ {
+ //std::vector<cv::ocl::Info> oclinfo;
+ cv::ocl::BruteForceMatcher_OCL_base::DistType distType;
+ int normCode;
+ int dim;
- int queryDescCount;
- int countFactor;
+ int queryDescCount;
+ int countFactor;
- cv::Mat query, train;
+ cv::Mat query, train;
- virtual void SetUp()
- {
- //normCode = GET_PARAM(0);
- distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
- dim = GET_PARAM(1);
-
- //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- //CV_Assert(devnums > 0);
-
- queryDescCount = 300; // must be even number because we split train data in some cases in two
- countFactor = 4; // do not change it
-
- cv::RNG& rng = cvtest::TS::ptr()->get_rng();
-
- cv::Mat queryBuf, trainBuf;
-
- // Generate query descriptors randomly.
- // Descriptor vector elements are integer values.
- queryBuf.create(queryDescCount, dim, CV_32SC1);
- rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
- queryBuf.convertTo(queryBuf, CV_32FC1);
-
- // Generate train decriptors as follows:
- // copy each query descriptor to train set countFactor times
- // and perturb some one element of the copied descriptors in
- // in ascending order. General boundaries of the perturbation
- // are (0.f, 1.f).
- trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
- float step = 1.f / countFactor;
- for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
+ virtual void SetUp()
{
- cv::Mat queryDescriptor = queryBuf.row(qIdx);
- for (int c = 0; c < countFactor; c++)
+ //normCode = GET_PARAM(0);
+ distType = (cv::ocl::BruteForceMatcher_OCL_base::DistType)(int)GET_PARAM(0);
+ dim = GET_PARAM(1);
+
+ //int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+
+ queryDescCount = 300; // must be even number because we split train data in some cases in two
+ countFactor = 4; // do not change it
+
+ cv::RNG &rng = cvtest::TS::ptr()->get_rng();
+
+ cv::Mat queryBuf, trainBuf;
+
+ // Generate query descriptors randomly.
+ // Descriptor vector elements are integer values.
+ queryBuf.create(queryDescCount, dim, CV_32SC1);
+ rng.fill(queryBuf, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3));
+ queryBuf.convertTo(queryBuf, CV_32FC1);
+
+ // Generate train decriptors as follows:
+ // copy each query descriptor to train set countFactor times
+ // and perturb some one element of the copied descriptors in
+ // in ascending order. General boundaries of the perturbation
+ // are (0.f, 1.f).
+ trainBuf.create(queryDescCount * countFactor, dim, CV_32FC1);
+ float step = 1.f / countFactor;
+ for (int qIdx = 0; qIdx < queryDescCount; qIdx++)
{
- int tIdx = qIdx * countFactor + c;
- cv::Mat trainDescriptor = trainBuf.row(tIdx);
- queryDescriptor.copyTo(trainDescriptor);
- int elem = rng(dim);
- float diff = rng.uniform(step * c, step * (c + 1));
- trainDescriptor.at<float>(0, elem) += diff;
+ cv::Mat queryDescriptor = queryBuf.row(qIdx);
+ for (int c = 0; c < countFactor; c++)
+ {
+ int tIdx = qIdx * countFactor + c;
+ cv::Mat trainDescriptor = trainBuf.row(tIdx);
+ queryDescriptor.copyTo(trainDescriptor);
+ int elem = rng(dim);
+ float diff = rng.uniform(step * c, step * (c + 1));
+ trainDescriptor.at<float>(0, elem) += diff;
+ }
}
+
+ queryBuf.convertTo(query, CV_32F);
+ trainBuf.convertTo(train, CV_32F);
}
+ };
- queryBuf.convertTo(query, CV_32F);
- trainBuf.convertTo(train, CV_32F);
- }
-};
+ TEST_P(BruteForceMatcher, Match_Single)
+ {
+ cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
-TEST_P(BruteForceMatcher, Match_Single)
-{
- cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
+ std::vector<cv::DMatch> matches;
+ matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches);
- std::vector<cv::DMatch> matches;
- matcher.match(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches);
+ ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
- ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
+ int badCount = 0;
+ for (size_t i = 0; i < matches.size(); i++)
+ {
+ cv::DMatch match = matches[i];
+ if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
+ badCount++;
+ }
- int badCount = 0;
- for (size_t i = 0; i < matches.size(); i++)
- {
- cv::DMatch match = matches[i];
- if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
- badCount++;
+ ASSERT_EQ(0, badCount);
}
- ASSERT_EQ(0, badCount);
-}
-
-TEST_P(BruteForceMatcher, KnnMatch_2_Single)
-{
- const int knn = 2;
+ TEST_P(BruteForceMatcher, KnnMatch_2_Single)
+ {
+ const int knn = 2;
- cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
+ cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
- std::vector< std::vector<cv::DMatch> > matches;
- matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
+ std::vector< std::vector<cv::DMatch> > matches;
+ matcher.knnMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, knn);
- ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
+ ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
- int badCount = 0;
- for (size_t i = 0; i < matches.size(); i++)
- {
- if ((int)matches[i].size() != knn)
- badCount++;
- else
+ int badCount = 0;
+ for (size_t i = 0; i < matches.size(); i++)
{
- int localBadCount = 0;
- for (int k = 0; k < knn; k++)
+ if ((int)matches[i].size() != knn)
+ badCount++;
+ else
{
- cv::DMatch match = matches[i][k];
- if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
- localBadCount++;
+ int localBadCount = 0;
+ for (int k = 0; k < knn; k++)
+ {
+ cv::DMatch match = matches[i][k];
+ if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
+ localBadCount++;
+ }
+ badCount += localBadCount > 0 ? 1 : 0;
}
- badCount += localBadCount > 0 ? 1 : 0;
}
- }
- ASSERT_EQ(0, badCount);
-}
+ ASSERT_EQ(0, badCount);
+ }
-TEST_P(BruteForceMatcher, RadiusMatch_Single)
-{
- float radius;
- if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
- radius = 1.f / countFactor /countFactor;
- else
- radius = 1.f / countFactor;
-
- cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
-
- // assume support atomic.
- //if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
- //{
- // try
- // {
- // std::vector< std::vector<cv::DMatch> > matches;
- // matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
- // }
- // catch (const cv::Exception& e)
- // {
- // ASSERT_EQ(CV_StsNotImplemented, e.code);
- // }
- //}
- //else
+ TEST_P(BruteForceMatcher, RadiusMatch_Single)
{
- std::vector< std::vector<cv::DMatch> > matches;
- matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
+ float radius;
+ if(distType == cv::ocl::BruteForceMatcher_OCL_base::L2Dist)
+ radius = 1.f / countFactor / countFactor;
+ else
+ radius = 1.f / countFactor;
+
+ cv::ocl::BruteForceMatcher_OCL_base matcher(distType);
+
+ // assume support atomic.
+ //if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+ //{
+ // try
+ // {
+ // std::vector< std::vector<cv::DMatch> > matches;
+ // matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
+ // }
+ // catch (const cv::Exception& e)
+ // {
+ // ASSERT_EQ(CV_StsNotImplemented, e.code);
+ // }
+ //}
+ //else
+ {
+ std::vector< std::vector<cv::DMatch> > matches;
+ matcher.radiusMatch(cv::ocl::oclMat(query), cv::ocl::oclMat(train), matches, radius);
- ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
+ ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
- int badCount = 0;
- for (size_t i = 0; i < matches.size(); i++)
- {
- if ((int)matches[i].size() != 1)
- {
- badCount++;
- }
- else
+ int badCount = 0;
+ for (size_t i = 0; i < matches.size(); i++)
{
- cv::DMatch match = matches[i][0];
- if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
+ if ((int)matches[i].size() != 1)
+ {
badCount++;
+ }
+ else
+ {
+ cv::DMatch match = matches[i][0];
+ if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor) || (match.imgIdx != 0))
+ badCount++;
+ }
}
- }
- ASSERT_EQ(0, badCount);
+ ASSERT_EQ(0, badCount);
+ }
}
-}
-INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
- //ALL_DEVICES,
- testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
- testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
+ INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
+ //ALL_DEVICES,
+ testing::Values(DistType(cv::ocl::BruteForceMatcher_OCL_base::L1Dist), DistType(cv::ocl::BruteForceMatcher_OCL_base::L2Dist)),
+ testing::Values(DescriptorSize(57), DescriptorSize(64), DescriptorSize(83), DescriptorSize(128), DescriptorSize(179), DescriptorSize(256), DescriptorSize(304))));
} // namespace
-
+#endif
//M*/
#include "precomp.hpp"
-
-#define FILTER_IMAGE "../../../samples/gpu/road.png"
+#ifdef HAVE_OPENCL
+#ifdef WIN32
+#define FILTER_IMAGE "C:/Users/Public/Pictures/Sample Pictures/Penguins.jpg"
+#else
+#define FILTER_IMAGE "/Users/Test/Valve_original.PNG" // user need to specify a valid image path
+#endif
#define SHOW_RESULT 0
////////////////////////////////////////////////////////
bool useL2gradient;
cv::Mat edges_gold;
- //std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
apperture_size = GET_PARAM(0);
useL2gradient = GET_PARAM(1);
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums > 0);
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums > 0);
}
};
double low_thresh = 50.0;
double high_thresh = 100.0;
- cv::resize(img, img, cv::Size(512, 384));
- cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
+ cv::resize(img, img, cv::Size(512, 384));
+ cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);
- cv::ocl::oclMat edges;
- cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
+ cv::ocl::oclMat edges;
+ cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient);
- char filename [100];
- sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
+ char filename [100];
+ sprintf(filename, "G:/Valve_edges_a%d_L2Grad%d.jpg", apperture_size, (int)useL2gradient);
- cv::Mat edges_gold;
- cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
+ cv::Mat edges_gold;
+ cv::Canny(img, edges_gold, low_thresh, high_thresh, apperture_size, useL2gradient);
#if SHOW_RESULT
- cv::Mat edges_x2, ocl_edges(edges);
- edges_x2.create(edges.rows, edges.cols * 2, edges.type());
- edges_x2.setTo(0);
- cv::add(edges_gold,cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(0,0,edges_gold.cols,edges_gold.rows)));
- cv::add(ocl_edges,cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)), cv::Mat(edges_x2,cv::Rect(edges_gold.cols,0,edges_gold.cols,edges_gold.rows)));
- cv::namedWindow("Canny result (left: cpu, right: ocl)");
+ cv::Mat edges_x2, ocl_edges(edges);
+ edges_x2.create(edges.rows, edges.cols * 2, edges.type());
+ edges_x2.setTo(0);
+ cv::add(edges_gold, cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(0, 0, edges_gold.cols, edges_gold.rows)));
+ cv::add(ocl_edges, cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)), cv::Mat(edges_x2, cv::Rect(edges_gold.cols, 0, edges_gold.cols, edges_gold.rows)));
+ cv::namedWindow("Canny result (left: cpu, right: ocl)");
cv::imshow("Canny result (left: cpu, right: ocl)", edges_x2);
- cv::waitKey();
+ cv::waitKey();
#endif //OUTPUT_RESULT
- EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
+ EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
- testing::Values(AppertureSize(3), AppertureSize(5)),
- testing::Values(L2gradient(false), L2gradient(true))));
+ testing::Values(AppertureSize(3), AppertureSize(5)),
+ testing::Values(L2gradient(false), L2gradient(true))));
+#endif
//
// @Authors
// Chunpeng Zhang chunpeng@multicorewareinc.com
-//
+//
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
{
cv::Size size;
cv::Mat src;
- bool useRoi;
- //std::vector<cv::ocl::Info> oclinfo;
+ bool useRoi;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
size = GET_PARAM(0);
- useRoi = GET_PARAM(1);
+ useRoi = GET_PARAM(1);
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0);
}
TEST_P(ColumnSum, Accuracy)
{
cv::Mat src = randomMat(size, CV_32FC1);
- cv::ocl::oclMat d_dst;
- cv::ocl::oclMat d_src(src);
+ cv::ocl::oclMat d_dst;
+ cv::ocl::oclMat d_src(src);
- cv::ocl::columnSum(d_src,d_dst);
+ cv::ocl::columnSum(d_src, d_dst);
cv::Mat dst(d_dst);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ColumnSum, testing::Combine(
- DIFFERENT_SIZES,testing::Values(Inverse(false),Inverse(true))));
+ DIFFERENT_SIZES, testing::Values(Inverse(false), Inverse(true))));
-#endif
+#endif
#ifdef HAVE_CLAMDFFT
////////////////////////////////////////////////////////////////////////////
// Dft
-PARAM_TEST_CASE(Dft, cv::Size, bool)
+PARAM_TEST_CASE(Dft, cv::Size, bool)
{
- cv::Size dft_size;
- bool dft_rows;
- //std::vector<cv::ocl::Info> oclinfo;
+ cv::Size dft_size;
+ bool dft_rows;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
- //int devnums = getDevice(oclinfo);
- // CV_Assert(devnums > 0);
- dft_size = GET_PARAM(0);
- dft_rows = GET_PARAM(1);
+ //int devnums = getDevice(oclinfo);
+ // CV_Assert(devnums > 0);
+ dft_size = GET_PARAM(0);
+ dft_rows = GET_PARAM(1);
}
};
TEST_P(Dft, C2C)
{
- cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
- cv::Mat b_gold;
- int flags = 0;
- flags |= dft_rows ? cv::DFT_ROWS : 0;
+ cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0);
+ cv::Mat b_gold;
+ int flags = 0;
+ flags |= dft_rows ? cv::DFT_ROWS : 0;
- cv::ocl::oclMat d_b;
-
- cv::dft(a, b_gold, flags);
- cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
- EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
+ cv::ocl::oclMat d_b;
+
+ cv::dft(a, b_gold, flags);
+ cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+ EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), a.size().area() * 1e-4, "");
}
TEST_P(Dft, R2CthenC2R)
{
- cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
-
- int flags = 0;
- //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
+ cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0);
+
+ int flags = 0;
+ //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet
- cv::ocl::oclMat d_b, d_c;
- cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
- cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
- EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
+ cv::ocl::oclMat d_b, d_c;
+ cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags);
+ cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT);
+ EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine(
- testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
- testing::Values(false, true)));
+ testing::Values(cv::Size(5, 4), cv::Size(20, 20)),
+ testing::Values(false, true)));
#endif // HAVE_CLAMDFFT
{
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat.cols);
roirows = rng.uniform(2, mat.rows);
srcx = rng.uniform(0, mat.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
-INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
Values(cv::Size(3, 3), cv::Size(5, 5), cv::Size(7, 7)),
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101)));
INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(1, 3)));
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
-INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT,
(MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4), Values(0, 1), Values(0, 1),
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
Values(cv::Size(3, 3), cv::Size(5, 5)),
Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));
-
+
#endif // HAVE_OPENCL
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
-PARAM_TEST_CASE(Gemm, int, cv::Size, int)
+PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
- int type;
- cv::Size mat_size;
- int flags;
- //vector<cv::ocl::Info> info;
+ int type;
+ cv::Size mat_size;
+ int flags;
+ //vector<cv::ocl::Info> info;
virtual void SetUp()
{
- type = GET_PARAM(0);
- mat_size = GET_PARAM(1);
- flags = GET_PARAM(2);
- //cv::ocl::getDevice(info);
+ type = GET_PARAM(0);
+ mat_size = GET_PARAM(1);
+ flags = GET_PARAM(2);
+ //cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Accuracy)
{
- cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
- cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
- cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
+ cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
- cv::Mat dst;
- cv::ocl::oclMat ocl_dst;
+ cv::Mat dst;
+ cv::ocl::oclMat ocl_dst;
- cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
- cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
+ cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
+ cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);
- EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+ EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
- testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
- testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
- testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
+ testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
+ testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
+ testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif
using namespace std;
using namespace cv;
-struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
+struct getRect
+{
+ Rect operator ()(const CvAvgComp &e) const
+ {
+ return e.rect;
+ }
+};
PARAM_TEST_CASE(HaarTestBase, int, int)
{
- //std::vector<cv::ocl::Info> oclinfo;
- cv::ocl::OclCascadeClassifier cascade, nestedCascade;
- cv::CascadeClassifier cpucascade, cpunestedCascade;
- // Mat img;
-
- double scale;
- int index;
-
- virtual void SetUp()
- {
- scale = 1.0;
- index=0;
- string cascadeName="../../../data/haarcascades/haarcascade_frontalface_alt.xml";
-
- if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
- {
- cout << "ERROR: Could not load classifier cascade" << endl;
- cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
- " [--scale[=<image scale>\n"
- " [filename|camera_index]\n" << endl ;
- return;
- }
- //int devnums = getDevice(oclinfo);
- //CV_Assert(devnums>0);
- ////if you want to use undefault device, set it here
- ////setDevice(oclinfo[0]);
- //cv::ocl::setBinpath("E:\\");
- }
+ //std::vector<cv::ocl::Info> oclinfo;
+ cv::ocl::OclCascadeClassifier cascade, nestedCascade;
+ cv::CascadeClassifier cpucascade, cpunestedCascade;
+ // Mat img;
+
+ double scale;
+ int index;
+
+ virtual void SetUp()
+ {
+ scale = 1.0;
+ index = 0;
+ string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml";
+
+ if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)))
+ {
+ cout << "ERROR: Could not load classifier cascade" << endl;
+ cout << "Usage: facedetect [--cascade=<cascade_path>]\n"
+ " [--scale[=<image scale>\n"
+ " [filename|camera_index]\n" << endl ;
+ return;
+ }
+ //int devnums = getDevice(oclinfo);
+ //CV_Assert(devnums>0);
+ ////if you want to use undefault device, set it here
+ ////setDevice(oclinfo[0]);
+ //cv::ocl::setBinpath("E:\\");
+ }
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct Haar : HaarTestBase {};
-TEST_F(Haar, FaceDetect)
-{
- string imgName = "../../../samples/c/lena.jpg";
- Mat img = imread( imgName, 1 );
-
- if(img.empty())
- {
- std::cout << "Couldn't read test" << index <<".jpg" << std::endl;
- return ;
- }
-
- int i = 0;
- double t = 0;
- vector<Rect> faces, oclfaces;
-
- const static Scalar colors[] = { CV_RGB(0,0,255),
- CV_RGB(0,128,255),
- CV_RGB(0,255,255),
- CV_RGB(0,255,0),
- CV_RGB(255,128,0),
- CV_RGB(255,255,0),
- CV_RGB(255,0,0),
- CV_RGB(255,0,255)} ;
-
- Mat gray, smallImg(cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 );
- MemStorage storage(cvCreateMemStorage(0));
- cvtColor( img, gray, CV_BGR2GRAY );
- resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
- equalizeHist( smallImg, smallImg );
-
-
- cv::ocl::oclMat image;
- CvSeq* _objects;
- image.upload(smallImg);
- _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
- 3, 0
- |CV_HAAR_SCALE_IMAGE
- , Size(30,30), Size(0, 0) );
- vector<CvAvgComp> vecAvgComp;
- Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
- oclfaces.resize(vecAvgComp.size());
- std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
-
- cpucascade.detectMultiScale( smallImg, faces, 1.1,
- 3, 0
- |CV_HAAR_SCALE_IMAGE
- , Size(30,30), Size(0, 0) );
- EXPECT_EQ(faces.size(),oclfaces.size());
- /* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
- {
- Mat smallImgROI;
- Point center;
- Scalar color = colors[i%8];
- int radius;
- center.x = cvRound((r->x + r->width*0.5)*scale);
- center.y = cvRound((r->y + r->height*0.5)*scale);
- radius = cvRound((r->width + r->height)*0.25*scale);
- circle( img, center, radius, color, 3, 8, 0 );
- } */
- //namedWindow("result");
- //imshow("result",img);
- //waitKey(0);
- //destroyAllWindows();
+TEST_F(Haar, FaceDetect)
+{
+ string imgName = "../../../samples/c/lena.jpg";
+ Mat img = imread( imgName, 1 );
+
+ if(img.empty())
+ {
+ std::cout << "Couldn't read test" << index << ".jpg" << std::endl;
+ return ;
+ }
+
+ int i = 0;
+ double t = 0;
+ vector<Rect> faces, oclfaces;
+
+ const static Scalar colors[] = { CV_RGB(0, 0, 255),
+ CV_RGB(0, 128, 255),
+ CV_RGB(0, 255, 255),
+ CV_RGB(0, 255, 0),
+ CV_RGB(255, 128, 0),
+ CV_RGB(255, 255, 0),
+ CV_RGB(255, 0, 0),
+ CV_RGB(255, 0, 255)
+ } ;
+
+ Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 );
+ MemStorage storage(cvCreateMemStorage(0));
+ cvtColor( img, gray, CV_BGR2GRAY );
+ resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR );
+ equalizeHist( smallImg, smallImg );
+
+
+ cv::ocl::oclMat image;
+ CvSeq *_objects;
+ image.upload(smallImg);
+ _objects = cascade.oclHaarDetectObjects( image, storage, 1.1,
+ 3, 0
+ | CV_HAAR_SCALE_IMAGE
+ , Size(30, 30), Size(0, 0) );
+ vector<CvAvgComp> vecAvgComp;
+ Seq<CvAvgComp>(_objects).copyTo(vecAvgComp);
+ oclfaces.resize(vecAvgComp.size());
+ std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect());
+
+ cpucascade.detectMultiScale( smallImg, faces, 1.1,
+ 3, 0
+ | CV_HAAR_SCALE_IMAGE
+ , Size(30, 30), Size(0, 0) );
+ EXPECT_EQ(faces.size(), oclfaces.size());
+ /* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
+ {
+ Mat smallImgROI;
+ Point center;
+ Scalar color = colors[i%8];
+ int radius;
+ center.x = cvRound((r->x + r->width*0.5)*scale);
+ center.y = cvRound((r->y + r->height*0.5)*scale);
+ radius = cvRound((r->width + r->height)*0.25*scale);
+ circle( img, center, radius, color, 3, 8, 0 );
+ } */
+ //namedWindow("result");
+ //imshow("result",img);
+ //waitKey(0);
+ //destroyAllWindows();
}
#endif // HAVE_OPENCL
#ifdef HAVE_OPENCL
-PARAM_TEST_CASE(HOG,cv::Size,int)
+PARAM_TEST_CASE(HOG, cv::Size, int)
{
- cv::Size winSize;
- int type;
- virtual void SetUp()
- {
- winSize = GET_PARAM(0);
- type = GET_PARAM(1);
- }
+ cv::Size winSize;
+ int type;
+ virtual void SetUp()
+ {
+ winSize = GET_PARAM(0);
+ type = GET_PARAM(1);
+ }
};
TEST_P(HOG, GetDescriptors)
bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
{
return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
- (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
+ (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
}
TEST_P(HOG, Detect)
// OpenCL detection
std::vector<cv::Rect> d_found;
- ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
-
+ ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
+
// CPU detection
std::vector<cv::Rect> found;
switch (type)
{
case CV_8UC1:
- hog.detectMultiScale(img, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
+ hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
break;
case CV_8UC4:
default:
- hog.detectMultiScale(img_rgb, found, 0, cv::Size(8,8), cv::Size(0,0), 1.05, 2);
+ hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2);
break;
}
-
+
// Ground-truth rectangular people window
cv::Rect win1_64x128(231, 190, 72, 144);
cv::Rect win2_64x128(621, 156, 97, 194);
}
}
- char s[100]={0};
+ char s[100] = {0};
EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3, s);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HOG, testing::Combine(
- testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
- testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
+ testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
+ testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
#endif //HAVE_OPENCL
{
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
- if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
rowCount++;
}
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
- if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
rowCount++;
}
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
- if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
rowCount++;
}
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
- if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
for(; x <= maxx; x++, ptr += 4)
{
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
- if(tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2)
+ if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
{
s0 += t0;
s1 += t1;
s2 = cvFloor(s2 * icount);
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
- tab[s0-c0+255] + tab[s1-c1+255] + tab[s2-c2+255] <= eps);
+ tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
int radius = 9;
int d = 2 * radius + 1;
double sigmaspace = 20.0;
- int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101};
- const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"};
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
- if (mat1.type() != CV_8UC1 || mat1.type() != dst.type())
+ if (mat1.depth() != CV_8U || mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
EXPECT_DOUBLE_EQ(0.0, 0.0);
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
- #ifdef RANDOMROI
- if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=radius) || (mat1_roi.cols<=radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
- {
- continue;
- }
- if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
- {
- dst_roi.adjustROI(radius, radius, radius, radius);
- cldst_roi.adjustROI(radius, radius, radius, radius);
- }
- else
- {
- continue;
- }
- #endif
- cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED);
- cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]|cv::BORDER_ISOLATED);
+ if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= radius) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius))
+ {
+ continue;
+ }
+ //if((dstx>=radius) && (dsty >= radius) && (dstx+cldst_roi.cols+radius <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+radius <= cldst_roi.wholerows))
+ //{
+ // dst_roi.adjustROI(radius, radius, radius, radius);
+ // cldst_roi.adjustROI(radius, radius, radius, radius);
+ //}
+ //else
+ //{
+ // continue;
+ //}
+
+ cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
+ cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i] | cv::BORDER_ISOLATED);
cv::Mat cpu_cldst;
- #ifndef RANDOMROI
- cldst_roi.download(cpu_cldst);
- #else
- cldst.download(cpu_cldst);
- #endif
+ cldst.download(cpu_cldst);
+
char sss[1024];
sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,radius=%d,boredertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, radius, borderstr[i]);
+ //for(int i=0;i<dst.rows;i++)
+ //{
+ // for(int j=0;j<dst.cols*dst.channels();j++)
+ // {
+ // if(dst.at<uchar>(i,j)!=cpu_cldst.at<uchar>(i,j))
+ // cout<< i <<" "<< j <<" "<< (int)dst.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
+ // }
+ // cout<<endl;
+ //}
+
+ EXPECT_MAT_NEAR(dst, cpu_cldst, 1.0, sss);
- #ifndef RANDOMROI
- EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
- #else
- //for(int i=0;i<dst_roi.rows;i++)
- //{
- // for(int j=0;j<dst_roi.cols;j++)
- // {
- // cout<< (int)dst_roi.at<uchar>(i,j)<<" "<< (int)cpu_cldst.at<uchar>(i,j)<<" ";
- // }
- // cout<<endl;
- //}
- EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
- #endif
}
}
}
TEST_P(CopyMakeBorder, Mat)
{
- int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101};
- const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"};
- cv::RNG &rng = TS::ptr()->get_rng();
- int top = rng.uniform(0, 10);
- int bottom = rng.uniform(0, 10);
- int left = rng.uniform(0, 10);
- int right = rng.uniform(0, 10);
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
+ cv::RNG &rng = TS::ptr()->get_rng();
+ int top = rng.uniform(0, 10);
+ int bottom = rng.uniform(0, 10);
+ int left = rng.uniform(0, 10);
+ int right = rng.uniform(0, 10);
if (mat1.type() != dst.type())
{
cout << "Unsupported type" << endl;
for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
- #ifdef RANDOMROI
- if(((bordertype[i]!=cv::BORDER_CONSTANT) && (bordertype[i]!=cv::BORDER_REPLICATE))&&(mat1_roi.cols<=left) || (mat1_roi.cols<=right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom))
- {
- continue;
- }
- if((dstx>=left) && (dsty >= top) && (dstx+cldst_roi.cols+right <=cldst_roi.wholecols) && (dsty+cldst_roi.rows+bottom <= cldst_roi.wholerows))
- {
- dst_roi.adjustROI(top, bottom, left, right);
- cldst_roi.adjustROI(top, bottom, left, right);
- }
- else
- {
- continue;
- }
- #endif
- cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0));
- cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i]| cv::BORDER_ISOLATED, cv::Scalar(1.0));
+#ifdef RANDOMROI
+ if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE)) && (mat1_roi.cols <= left) || (mat1_roi.cols <= right) || (mat1_roi.rows <= top) || (mat1_roi.rows <= bottom))
+ {
+ continue;
+ }
+ if((dstx >= left) && (dsty >= top) && (dstx + cldst_roi.cols + right <= cldst_roi.wholecols) && (dsty + cldst_roi.rows + bottom <= cldst_roi.wholerows))
+ {
+ dst_roi.adjustROI(top, bottom, left, right);
+ cldst_roi.adjustROI(top, bottom, left, right);
+ }
+ else
+ {
+ continue;
+ }
+#endif
+ cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
+ cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0));
cv::Mat cpu_cldst;
- #ifndef RANDOMROI
+#ifndef RANDOMROI
cldst_roi.download(cpu_cldst);
- #else
- cldst.download(cpu_cldst);
- #endif
+#else
+ cldst.download(cpu_cldst);
+#endif
char sss[1024];
- sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right,borderstr[i]);
- #ifndef RANDOMROI
+ sprintf(sss, "roicols=%d,roirows=%d,src1x=%d,src1y=%d,dstx=%d,dsty=%d,dst1x=%d,dst1y=%d,top=%d,bottom=%d,left=%d,right=%d, bordertype=%s", roicols, roirows, src1x, src1y, dstx, dsty, dst1x, dst1y, top, bottom, left, right, borderstr[i]);
+#ifndef RANDOMROI
EXPECT_MAT_NEAR(dst_roi, cpu_cldst, 0.0, sss);
- #else
- //for(int i=0;i<dst.rows;i++)
- //{
- //for(int j=0;j<dst.cols;j++)
- //{
- // cout<< (int)dst.at<uchar>(i,j)<<" ";
- //}
- //cout<<endl;
- //}
- EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
- #endif
+#else
+ //for(int i=0;i<dst.rows;i++)
+ //{
+ //for(int j=0;j<dst.cols;j++)
+ //{
+ // cout<< (int)dst.at<uchar>(i,j)<<" ";
+ //}
+ //cout<<endl;
+ //}
+ EXPECT_MAT_NEAR(dst, cpu_cldst, 0.0, sss);
+#endif
}
}
}
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
src_roicols = rng.uniform(1, mat1.cols);
src_roirows = rng.uniform(1, mat1.rows);
dst_roicols = rng.uniform(1, dst.cols);
cv::Mat map2;
//std::vector<cv::ocl::Info> oclinfo;
-
+
int src_roicols;
int src_roirows;
int dst_roicols;
//int devnums = getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
//CV_Assert(devnums > 0);
- cv::RNG& rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT);
cv::Size dstSize = cv::Size(MWIDTH, MHEIGHT);
cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT);
else
{
- cout<<"The wrong input type"<<endl;
+ cout << "The wrong input type" << endl;
return;
}
dst = randomMat(rng, map1Size, srcType, min, max, false);
switch (src.channels())
{
- case 1:
- val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
- break;
- case 2:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
- break;
- case 3:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
- break;
- case 4:
- val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
- break;
+ case 1:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0);
+ break;
+ case 2:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0);
+ break;
+ case 3:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0);
+ break;
+ case 4:
+ val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0));
+ break;
}
}
void random_roi()
{
- cv::RNG& rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
dst_roicols = rng.uniform(1, dst.cols);
dst_roirows = rng.uniform(1, dst.rows);
src_roicols = rng.uniform(1, src.cols);
src_roirows = rng.uniform(1, src.rows);
-
+
srcx = rng.uniform(0, src.cols - src_roicols);
srcy = rng.uniform(0, src.rows - src_roirows);
dstx = rng.uniform(0, dst.cols - dst_roicols);
if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype))
{
- map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
+ map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
gmap1_roi = map1_roi;
}
else if (map1Type == CV_32FC1 && map2Type == CV_32FC1)
{
- map1_roi = map1(Rect(map1x,map1y,map1_roicols,map1_roirows));
+ map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows));
gmap1_roi = map1_roi;
- map2_roi = map2(Rect(map2x,map2y,map2_roicols,map2_roirows));
+ map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows));
gmap2_roi = map2_roi;
}
- src_roi = src(Rect(srcx,srcy,src_roicols,src_roirows));
- dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
+ src_roi = src(Rect(srcx, srcy, src_roicols, src_roirows));
+ dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows));
gsrc_roi = src_roi;
gdst = dst;
gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows));
TEST_P(Remap, Mat)
{
- if((interpolation == 1 && map1Type == CV_16SC2) ||(map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
+ if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1))
{
cout << "Don't support the dataType" << endl;
- return;
+ return;
}
- int bordertype[] = {cv::BORDER_CONSTANT,cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
- const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
+ int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/};
+ const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/};
// for(int i = 0; i < sizeof(bordertype)/sizeof(int); i++)
- for(int j=0; j<100; j++)
+ for(int j = 0; j < 100; j++)
{
random_roi();
cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val);
char sss[1024];
sprintf(sss, "src_roicols=%d,src_roirows=%d,dst_roicols=%d,dst_roirows=%d,src1x =%d,src1y=%d,dstx=%d,dsty=%d", src_roicols, src_roirows, dst_roicols, dst_roirows, srcx, srcy, dstx, dsty);
-
+
if(interpolation == 0)
EXPECT_MAT_NEAR(dst, cpu_dst, 1.0, sss);
EXPECT_MAT_NEAR(dst, cpu_dst, 2.0, sss);
-
+
}
}
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
src_roicols = rng.uniform(1, mat1.cols);
src_roirows = rng.uniform(1, mat1.rows);
- dst_roicols = (int)(src_roicols*fx);
- dst_roirows = (int)(src_roirows*fy);
+ dst_roicols = (int)(src_roicols * fx);
+ dst_roirows = (int)(src_roirows * fy);
src1x = rng.uniform(0, mat1.cols - src_roicols);
src1y = rng.uniform(0, mat1.rows - src_roirows);
dstx = rng.uniform(0, dst.cols - dst_roicols);
// cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
// cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
- if(dst_roicols<1||dst_roirows<1) continue;
+ if(dst_roicols < 1 || dst_roirows < 1) continue;
cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation);
cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
///////////////////////////////////////////////////////////////////////////////////////
//hist
-void calcHistGold(const cv::Mat& src, cv::Mat& hist)
+void calcHistGold(const cv::Mat &src, cv::Mat &hist)
{
hist.create(1, 256, CV_32SC1);
hist.setTo(cv::Scalar::all(0));
- int* hist_row = hist.ptr<int>();
+ int *hist_row = hist.ptr<int>();
for (int y = 0; y < src.rows; ++y)
{
- const uchar* src_row = src.ptr(y);
+ const uchar *src_row = src.ptr(y);
for (int x = 0; x < src.cols; ++x)
++hist_row[src_row[x]];
cv::ocl::oclMat gdst_hist;
//ocl mat with roi
cv::ocl::oclMat gsrc_roi;
-// std::vector<cv::ocl::Info> oclinfo;
+ // std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
type_src = GET_PARAM(0);
-
+
cv::RNG &rng = TS::ptr()->get_rng();
cv::Size size = cv::Size(MWIDTH, MHEIGHT);
src = randomMat(rng, size, type_src, 0, 256, false);
-// int devnums = getDevice(oclinfo);
-// CV_Assert(devnums > 0);
+ // int devnums = getDevice(oclinfo);
+ // CV_Assert(devnums > 0);
//if you want to use undefault device, set it here
//setDevice(oclinfo[0]);
}
int N2 = y.rows;
int M2 = y.cols;
- int i,j;
- int m,n;
-
+ int i, j;
+ int m, n;
+
float *kerneldata = (float *)(x.data);
float *srcdata = (float *)(y.data);
float *dstdata = (float *)(z.data);
- for(i=0;i<N2;i++)
- for(j=0;j<M2;j++)
+ for(i = 0; i < N2; i++)
+ for(j = 0; j < M2; j++)
{
- float temp =0;
- for(m=0;m<N1;m++)
- for(n=0;n<M1;n++)
+ float temp = 0;
+ for(m = 0; m < N1; m++)
+ for(n = 0; n < M1; n++)
{
int r, c;
- r = min(max((i-N1/2+m), 0), N2-1);
- c = min(max((j-M1/2+n), 0), M2-1);
- temp += kerneldata[m*(x.step>>2)+n]*srcdata[r*(y.step>>2)+c];
+ r = min(max((i - N1 / 2 + m), 0), N2 - 1);
+ c = min(max((j - M1 / 2 + n), 0), M2 - 1);
+ temp += kerneldata[m * (x.step >> 2) + n] * srcdata[r * (y.step >> 2) + c];
}
- dstdata[i*(z.step >> 2)+j]=temp;
+ dstdata[i * (z.step >> 2) + j] = temp;
}
}
TEST_P(Convolve, Mat)
{
- if(mat1.type()!=CV_32FC1)
+ if(mat1.type() != CV_32FC1)
{
- cout<<"\tUnsupported type\t\n";
+ cout << "\tUnsupported type\t\n";
}
- for(int j=0;j<LOOP_TIMES;j++)
+ for(int j = 0; j < LOOP_TIMES; j++)
{
random_roi();
cv::ocl::oclMat temp1;
- cv::Mat kernel_cpu= mat2(Rect(0,0,7,7));
+ cv::Mat kernel_cpu = mat2(Rect(0, 0, 7, 7));
temp1 = kernel_cpu;
- conv2(kernel_cpu,mat1_roi,dst_roi);
- cv::ocl::convolve(gmat1,temp1,gdst);
-
+ conv2(kernel_cpu, mat1_roi, dst_roi);
+ cv::ocl::convolve(gmat1, temp1, gdst);
+
cv::Mat cpu_dst;
gdst_whole.download(cpu_dst);
// NULL_TYPE,
// NULL_TYPE,
// Values(false))); // Values(false) is the reserved parameter
+INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine(
+ Values(CV_8UC1, CV_8UC3),
+ NULL_TYPE,
+ Values(CV_8UC1, CV_8UC3),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine(
- Values(CV_8UC1, CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
- NULL_TYPE,
- Values(CV_8UC1,CV_8UC4,CV_32SC1, CV_32SC4,CV_32FC1, CV_32FC4),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
+ NULL_TYPE,
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine(
- Values(CV_8UC1,CV_32FC1),
- NULL_TYPE,
- ONE_TYPE(CV_32FC1),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine(
- Values(CV_8UC1,CV_32FC1),
- NULL_TYPE,
- ONE_TYPE(CV_32FC1),
- NULL_TYPE,
- NULL_TYPE,
- Values(false))); // Values(false) is the reserved parameter
+ Values(CV_8UC1, CV_32FC1),
+ NULL_TYPE,
+ ONE_TYPE(CV_32FC1),
+ NULL_TYPE,
+ NULL_TYPE,
+ Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine(
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
- (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
- (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
+ (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
+ (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine
- (Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
+ (Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR,
(MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP),
(MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP))));
INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), Values(cv::Size()),
Values(0.5, 1.5, 2), Values(0.5, 1.5, 2), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR)));
Values(6),
Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
));
-
+
INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine(
- ONE_TYPE(CV_8UC4),
- ONE_TYPE(CV_16SC2),
- Values(5),
- Values(6),
- Values(cv::TermCriteria(cv::TermCriteria::COUNT+cv::TermCriteria::EPS, 5, 1))
-));
+ ONE_TYPE(CV_8UC4),
+ ONE_TYPE(CV_16SC2),
+ Values(5),
+ Values(6),
+ Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1))
+ ));
INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32FC1, CV_32FC4),
- Values(CV_32FC1, CV_16SC2, CV_32FC2),Values(-1,CV_32FC1),
- Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
- Values((int)cv::BORDER_CONSTANT)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
+ Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1),
+ Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR),
+ Values((int)cv::BORDER_CONSTANT)));
INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine(
- ONE_TYPE(CV_8UC1),
- ONE_TYPE(CV_32SC1) //no use
-));
+ ONE_TYPE(CV_8UC1),
+ ONE_TYPE(CV_32SC1) //no use
+ ));
INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine(
Values(CV_32FC1, CV_32FC1),
#include "precomp.hpp"
-
+#define PERF_TEST 0
+#ifdef HAVE_OPENCL
////////////////////////////////////////////////////////////////////////////////
// MatchTemplate
#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED))
IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size);
-const char* TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
+const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"};
#define MTEMP_SIZES testing::Values(cv::Size(128, 256), cv::Size(1024, 768))
cv::Size templ_size;
int cn;
int method;
- //std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
TEST_P(MatchTemplate8U, Accuracy)
{
- std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
- std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
- std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
- std::cout << "Channels: " << cn << std::endl;
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
+ std::cout << "Channels: " << cn << std::endl;
- cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
+ cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn));
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn));
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
- cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
cv::Mat dst_gold;
cv::matchTemplate(image, templ, dst_gold, method);
- char sss [100] = "";
+ char sss [100] = "";
- cv::Mat mat_dst;
- dst.download(mat_dst);
+ cv::Mat mat_dst;
+ dst.download(mat_dst);
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
#if PERF_TEST
- {
- P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
- P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
- }
+ {
+ P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
+ P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
+ }
#endif // PERF_TEST
}
cv::Size templ_size;
int cn;
int method;
- //std::vector<cv::ocl::Info> oclinfo;
+ //std::vector<cv::ocl::Info> oclinfo;
virtual void SetUp()
{
cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn));
cv::ocl::oclMat dst, ocl_image(image), ocl_templ(templ);
- cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
+ cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);
cv::Mat dst_gold;
cv::matchTemplate(image, templ, dst_gold, method);
- char sss [100] = "";
+ char sss [100] = "";
- cv::Mat mat_dst;
- dst.download(mat_dst);
+ cv::Mat mat_dst;
+ dst.download(mat_dst);
EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss);
#if PERF_TEST
- {
- std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
- std::cout << "Image Size: (" << size.width << ", " << size.height << ")"<< std::endl;
- std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")"<< std::endl;
- std::cout << "Channels: " << cn << std::endl;
- P_TEST_FULL({}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
- P_TEST_FULL({}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
- }
+ {
+ std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl;
+ std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl;
+ std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl;
+ std::cout << "Channels: " << cn << std::endl;
+ P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {});
+ P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {});
+ }
#endif // PERF_TEST
}
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
- testing::Combine(
- MTEMP_SIZES,
- testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
- testing::Values(Channels(1), Channels(3),Channels(4)),
- ALL_TEMPLATE_METHODS
- )
-);
-
-INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
- MTEMP_SIZES,
- testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
- testing::Values(Channels(1), Channels(3),Channels(4)),
- testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
-
+//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,
+// testing::Combine(
+// MTEMP_SIZES,
+// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+// testing::Values(Channels(1), Channels(3), Channels(4)),
+// ALL_TEMPLATE_METHODS
+// )
+// );
+//
+//INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine(
+// MTEMP_SIZES,
+// testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/),
+// testing::Values(Channels(1), Channels(3), Channels(4)),
+// testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR))));
+#endif
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols);
roirows = rng.uniform(1, mat.rows);
srcx = rng.uniform(0, mat.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols);
roirows = rng.uniform(1, mat.rows);
srcx = rng.uniform(0, mat.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols);
roirows = rng.uniform(1, mat.rows);
srcx = rng.uniform(0, mat.cols - roicols);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(2, mat1.cols);
roirows = rng.uniform(2, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
for(int j = 0; j < LOOP_TIMES; j++)
{
//random_roi();
- int width = rng.uniform(2, MWIDTH);
- int height = rng.uniform(2, MHEIGHT);
+ int width = rng.uniform(2, MWIDTH);
+ int height = rng.uniform(2, MHEIGHT);
cv::Size size(width, height);
mat1 = randomMat(rng, size, type, 0, 40, false);
- gmat1 = mat1;
+ gmat1 = mat1;
cv::Mat cpu_dst;
gmat1.download(cpu_dst);
char sss[1024];
}
INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4)));
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4)));
INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine(
- Values(CV_8UC1, CV_8UC3,CV_8UC4, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4),
+ Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32SC3, CV_32SC4, CV_32FC1, CV_32FC3, CV_32FC4),
Values(false))); // Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P(MatrixOperation, convertC3C4, Combine(
Values(CV_8UC3, CV_32SC3, CV_32FC3),
- Values(cv::Size())));
+ Values(cv::Size())));
#endif
PARAM_TEST_CASE(PyrDown, MatType, int)
{
- int type;
- int channels;
+ int type;
+ int channels;
virtual void SetUp()
{
type = GET_PARAM(0);
- channels = GET_PARAM(1);
+ channels = GET_PARAM(1);
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums > 0);
////setDevice(oclinfo[0]);
}
- void Cleanup()
- {
- }
+ void Cleanup()
+ {
+ }
};
for(int j = 0; j < LOOP_TIMES; j++)
{
cv::Size size(MWIDTH, MHEIGHT);
- cv::RNG &rng = TS::ptr()->get_rng();
- cv::Mat src=randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
+ cv::RNG &rng = TS::ptr()->get_rng();
+ cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false);
- cv::ocl::oclMat gsrc(src), gdst;
- cv::Mat dst_cpu;
- cv::pyrDown(src, dst_cpu);
- cv::ocl::pyrDown(gsrc, gdst);
+ cv::ocl::oclMat gsrc(src), gdst;
+ cv::Mat dst_cpu;
+ cv::pyrDown(src, dst_cpu);
+ cv::ocl::pyrDown(gsrc, gdst);
cv::Mat dst;
gdst.download(dst);
- char s[1024]={0};
+ char s[1024] = {0};
- EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
+ EXPECT_MAT_NEAR(dst, dst_cpu, dst.depth() == CV_32F ? 1e-4f : 1.0f, s);
- Cleanup();
+ Cleanup();
}
}
virtual void SetUp()
{
UseSmart = GET_PARAM(0);
- useGray = GET_PARAM(0);
+ useGray = GET_PARAM(0);
}
};
cv::goodFeaturesToTrack(gray_frame, pts, 1000, 0.01, 0.0);
cv::ocl::oclMat d_pts;
- cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void*)&pts[0]);
+ cv::Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]);
d_pts.upload(pts_mat);
cv::ocl::PyrLKOpticalFlow pyrLK;
- cv::ocl::oclMat oclFrame0;
- cv::ocl::oclMat oclFrame1;
+ cv::ocl::oclMat oclFrame0;
+ cv::ocl::oclMat oclFrame1;
cv::ocl::oclMat d_nextPts;
cv::ocl::oclMat d_status;
cv::ocl::oclMat d_err;
- oclFrame0 = frame0;
- oclFrame1 = frame1;
+ oclFrame0 = frame0;
+ oclFrame1 = frame1;
pyrLK.sparse(oclFrame0, oclFrame1, d_pts, d_nextPts, d_status, &d_err);
std::vector<cv::Point2f> nextPts(d_nextPts.cols);
- cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void*)&nextPts[0]);
+ cv::Mat nextPts_mat(1, d_nextPts.cols, CV_32FC2, (void *)&nextPts[0]);
d_nextPts.download(nextPts_mat);
std::vector<unsigned char> status(d_status.cols);
- cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void*)&status[0]);
+ cv::Mat status_mat(1, d_status.cols, CV_8UC1, (void *)&status[0]);
d_status.download(status_mat);
//std::vector<float> err(d_err.cols);
double bad_ratio = static_cast<double>(mistmatch) / (nextPts.size() * 2);
ASSERT_LE(bad_ratio, 0.05f);
-
+
}
INSTANTIATE_TEST_CASE_P(Video, Sparse, Combine(
- Values(false, true),
- Values(false)));
+ Values(false, true),
+ Values(false)));
#endif // HAVE_OPENCL
PARAM_TEST_CASE(PyrUp, MatType, int)
{
- int type;
- int channels;
- //std::vector<cv::ocl::Info> oclinfo;
+ int type;
+ int channels;
+ //std::vector<cv::ocl::Info> oclinfo;
- virtual void SetUp()
- {
- //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
- //CV_Assert(devnums > 0);
- type = GET_PARAM(0);
- channels = GET_PARAM(1);
- }
+ virtual void SetUp()
+ {
+ //int devnums = cv::ocl::getDevice(oclinfo, OPENCV_DEFAULT_OPENCL_DEVICE);
+ //CV_Assert(devnums > 0);
+ type = GET_PARAM(0);
+ channels = GET_PARAM(1);
+ }
};
-TEST_P(PyrUp,Accuracy)
+TEST_P(PyrUp, Accuracy)
{
- for(int j = 0; j < LOOP_TIMES; j++)
+ for(int j = 0; j < LOOP_TIMES; j++)
{
- Size size(MWIDTH, MHEIGHT);
- Mat src = randomMat(size,CV_MAKETYPE(type, channels));
- Mat dst_gold;
- pyrUp(src,dst_gold);
- ocl::oclMat dst;
- ocl::oclMat srcMat(src);
- ocl::pyrUp(srcMat,dst);
- Mat cpu_dst;
- dst.download(cpu_dst);
- char s[100]={0};
+ Size size(MWIDTH, MHEIGHT);
+ Mat src = randomMat(size, CV_MAKETYPE(type, channels));
+ Mat dst_gold;
+ pyrUp(src, dst_gold);
+ ocl::oclMat dst;
+ ocl::oclMat srcMat(src);
+ ocl::pyrUp(srcMat, dst);
+ Mat cpu_dst;
+ dst.download(cpu_dst);
+ char s[100] = {0};
+
+ EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0), s);
+ }
- EXPECT_MAT_NEAR(dst_gold, cpu_dst, (src.depth() == CV_32F ? 1e-4f : 1.0),s);
- }
-
}
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat1.cols);
roirows = rng.uniform(1, mat1.rows);
src1x = rng.uniform(0, mat1.cols - roicols);
src2x = rng.uniform(0, mat2.cols - roicols);
src2y = rng.uniform(0, mat2.rows - roirows);
src3x = rng.uniform(0, mat3.cols - roicols);
- src3y = rng.uniform(0, mat3.cols - roirows);
- src4x = rng.uniform(0, mat4.rows - roicols);
+ src3y = rng.uniform(0, mat3.rows - roirows);
+ src4x = rng.uniform(0, mat4.cols - roicols);
src4y = rng.uniform(0, mat4.rows - roirows);
dstx = rng.uniform(0, dst.cols - roicols);
dsty = rng.uniform(0, dst.rows - roirows);
dev_gsrc.push_back(gmat1);
if(channels >= 2)
- dev_gsrc.push_back(gmat2);
+ dev_gsrc.push_back(gmat2);
if(channels >= 3)
- dev_gsrc.push_back(gmat3);
+ dev_gsrc.push_back(gmat3);
if(channels >= 4)
- dev_gsrc.push_back(gmat4);
+ dev_gsrc.push_back(gmat4);
cv::merge(dev_src, dst_roi);
cv::ocl::merge(dev_gsrc, gdst);
}
void random_roi()
- {
+ {
#ifdef RANDOMROI
//randomize ROI
- cv::RNG &rng = TS::ptr()->get_rng();
+ cv::RNG &rng = TS::ptr()->get_rng();
roicols = rng.uniform(1, mat.cols);
roirows = rng.uniform(1, mat.rows);
srcx = rng.uniform(0, mat.cols - roicols);
sprintf(sss, "roicols=%d,roirows=%d,dst1x =%d,dsty=%d,dst2x =%d,dst2y=%d,dst3x =%d,dst3y=%d,dst4x =%d,dst4y=%d,srcx=%d,srcy=%d", roicols, roirows, dst1x , dst1y, dst2x , dst2y, dst3x , dst3y, dst4x , dst4y, srcx, srcy);
if(channels >= 1)
- EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss);
+ EXPECT_MAT_NEAR(dst1, cpu_dst1, 0.0, sss);
if(channels >= 2)
- EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss);
+ EXPECT_MAT_NEAR(dst2, cpu_dst2, 0.0, sss);
if(channels >= 3)
- EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss);
+ EXPECT_MAT_NEAR(dst3, cpu_dst3, 0.0, sss);
if(channels >= 4)
- EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss);
+ EXPECT_MAT_NEAR(dst4, cpu_dst4, 0.0, sss);
}
}
INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(
- Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4)));
+ Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4)));
INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine(
- Values(CV_8U, CV_32S, CV_32F), Values(1, 3,4)));
-
+ Values(CV_8U, CV_32S, CV_32F), Values(1, 3, 4)));
+
#endif // HAVE_OPENCL
return v;
}
-const vector<MatType>& all_types()
+const vector<MatType> &all_types()
{
static vector<MatType> v = types(CV_8U, CV_64F, 1, 4);
std::vector<MatType> types(int depth_start, int depth_end, int cn_start, int cn_end);
//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4).
-const std::vector<MatType>& all_types();
+const std::vector<MatType> &all_types();
class Inverse
{