added gpu exp, log, magnitude, based on NPP.
updated setTo with new NPP functions.
minor fix in tests and comments.
////////////////////////////// Arithmetics ///////////////////////////////////\r
\r
//! adds one matrix to another (c = a + b)\r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types\r
CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c);\r
+ //! adds scalar to a matrix (c = a + s)\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c);\r
//! subtracts one matrix from another (c = a - b)\r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types\r
CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c);\r
+ //! subtracts scalar from a matrix (c = a - s)\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c);\r
//! computes element-wise product of the two arrays (c = a * b)\r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types\r
CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c);\r
+ //! multiplies matrix to a scalar (c = a * s)\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c);\r
//! computes element-wise quotient of the two arrays (c = a / b)\r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types\r
CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c);\r
+ //! computes element-wise quotient of matrix and scalar (c = a / s)\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c);\r
\r
//! transposes the matrix\r
+ //! supports only CV_8UC1 type\r
CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst);\r
\r
//! computes element-wise absolute difference of two arrays (c = abs(a - b))\r
- CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c); \r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types\r
+ CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c);\r
+ //! computes element-wise absolute difference of array and scalar (c = abs(a - s))\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c);\r
\r
//! compares elements of two arrays (c = a <cmpop> b)\r
- //! Now doesn't support CMP_NE.\r
+ //! supports CV_8UC4, CV_32FC1 types\r
CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop);\r
\r
//! computes mean value and standard deviation of all or selected array elements\r
+ //! supports only CV_8UC1 type\r
CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev);\r
\r
//! computes norm of array\r
- //! Supports NORM_INF, NORM_L1, NORM_L2\r
+ //! supports NORM_INF, NORM_L1, NORM_L2\r
+ //! supports only CV_8UC1 type\r
CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2);\r
\r
//! computes norm of the difference between two arrays\r
- //! Supports NORM_INF, NORM_L1, NORM_L2\r
+ //! supports NORM_INF, NORM_L1, NORM_L2\r
+ //! supports only CV_8UC1 type\r
CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2);\r
\r
//! reverses the order of the rows, columns or both in a matrix\r
+ //! supports CV_8UC1, CV_8UC4 types\r
CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode);\r
\r
//! computes sum of array elements\r
+ //! supports CV_8UC1, CV_8UC4 types\r
CV_EXPORTS Scalar sum(const GpuMat& m);\r
\r
//! finds global minimum and maximum array elements and returns their values\r
+ //! supports only CV_8UC1 type\r
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal = 0);\r
\r
//! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i))\r
- //! supports only single channels source\r
- //! destination array will have the same type as source\r
- //! lut must hase CV_32S depth and the same number of channels as in the source array\r
+ //! destination array will have the depth type as lut and the same channels number as source\r
+ //! supports CV_8UC1, CV_8UC3 types\r
CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst);\r
\r
//! makes multi-channel array out of several single-channel arrays\r
//! copies each plane of a multi-channel array to a dedicated array (async version)\r
CV_EXPORTS void split(const GpuMat& src, vector<GpuMat>& dst, const Stream& stream);\r
\r
+ //! computes exponent of each matrix element (b = e**a)\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void exp(const GpuMat& a, GpuMat& b);\r
+ \r
+ //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))\r
+ //! supports only CV_32FC1 type\r
+ CV_EXPORTS void log(const GpuMat& a, GpuMat& b);\r
+\r
+ //! computes magnitude (magnitude(i)) of each (x(i), y(i)) vector\r
+ CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude);\r
+\r
////////////////////////////// Image processing //////////////////////////////\r
\r
//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.\r
- //! xymap.type() == xymap.type() == CV_32FC1\r
+ //! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type\r
CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap);\r
\r
//! Does mean shift filtering on GPU.\r
CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh);\r
\r
//! resizes the image\r
- //! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_LANCZOS4\r
+ //! Supports INTER_NEAREST, INTER_LINEAR\r
+ //! supports CV_8UC1, CV_8UC4 types\r
CV_EXPORTS void resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx=0, double fy=0, int interpolation = INTER_LINEAR);\r
\r
//! warps the image using affine transformation\r
\r
//! rotate 8bit single or four channel image\r
//! Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC\r
+ //! supports CV_8UC1, CV_8UC4 types\r
CV_EXPORTS void rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, double xShift = 0, double yShift = 0, int interpolation = INTER_LINEAR);\r
\r
//! copies 2D array to a larger destination array and pads borders with user-specifiable constant\r
+ //! supports CV_8UC1, CV_8UC4, CV_32SC1 types\r
CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value = Scalar());\r
\r
//! computes the integral image and integral for the squared image\r
//! sum will have CV_32S type, sqsum - CV32F type\r
+ //! supports only CV_32FC1 source type\r
CV_EXPORTS void integral(GpuMat& src, GpuMat& sum, GpuMat& sqsum);\r
\r
//! smooths the image using the normalized box filter\r
+ //! supports CV_8UC1, CV_8UC4 types and kernel size 3, 5, 7\r
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1));\r
\r
//! a synonym for normalized box filter\r
#if !defined (HAVE_CUDA)\r
\r
void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); }\r
void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); }\r
void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); }\r
double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; }\r
void cv::gpu::flip(const GpuMat&, GpuMat&, int) { throw_nogpu(); }\r
Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); }\r
void cv::gpu::minMax(const GpuMat&, double*, double*) { throw_nogpu(); }\r
-void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::exp(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::log(const GpuMat&, GpuMat&) { throw_nogpu(); }\r
+void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
{\r
typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, \r
NppiSize oSizeROI, int nScaleFactor);\r
+ typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, \r
+ int nDstStep, NppiSize oSizeROI); \r
typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, \r
- int nDstStep, NppiSize oSizeROI);\r
+ int nDstStep, NppiSize oSizeROI); \r
\r
- void nppFuncCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, \r
- npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, npp_arithm_32f_t npp_func_32fc1)\r
+ void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, \r
+ npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, \r
+ npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1)\r
{\r
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());\r
\r
- CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32FC1);\r
+ CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);\r
\r
dst.create( src1.size(), src1.type() );\r
\r
src2.ptr<Npp8u>(), src2.step, \r
dst.ptr<Npp8u>(), dst.step, sz, 0) );\r
break;\r
+ case CV_32SC1:\r
+ nppSafeCall( npp_func_32sc1(src1.ptr<Npp32s>(), src1.step,\r
+ src2.ptr<Npp32s>(), src2.step,\r
+ dst.ptr<Npp32s>(), dst.step, sz) );\r
+ break;\r
case CV_32FC1:\r
nppSafeCall( npp_func_32fc1(src1.ptr<Npp32f>(), src1.step,\r
src2.ptr<Npp32f>(), src2.step,\r
CV_Assert(!"Unsupported source type");\r
}\r
}\r
+\r
+ typedef NppStatus (*npp_arithm_scalar_32f_t)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, \r
+ int nDstStep, NppiSize oSizeROI);\r
+\r
+ void nppArithmCaller(const GpuMat& src1, const Scalar& sc, GpuMat& dst, \r
+ npp_arithm_scalar_32f_t npp_func)\r
+ {\r
+ CV_Assert(src1.type() == CV_32FC1);\r
+\r
+ dst.create(src1.size(), src1.type());\r
+\r
+ NppiSize sz;\r
+ sz.width = src1.cols;\r
+ sz.height = src1.rows;\r
+\r
+ nppSafeCall( npp_func(src1.ptr<Npp32f>(), src1.step, (Npp32f)sc[0], dst.ptr<Npp32f>(), dst.step, sz) );\r
+ }\r
}\r
\r
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)\r
{\r
- nppFuncCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32f_C1R);\r
+ nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R);\r
}\r
\r
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) \r
{\r
- nppFuncCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32f_C1R);\r
+ nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R);\r
}\r
\r
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)\r
{\r
- nppFuncCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32f_C1R);\r
+ nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R);\r
}\r
\r
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)\r
{\r
- nppFuncCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32f_C1R);\r
+ nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R);\r
+}\r
+\r
+void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst)\r
+{\r
+ nppArithmCaller(src, sc, dst, nppiAddC_32f_C1R);\r
+}\r
+\r
+void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst)\r
+{\r
+ nppArithmCaller(src, sc, dst, nppiSubC_32f_C1R);\r
+}\r
+\r
+void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst)\r
+{\r
+ nppArithmCaller(src, sc, dst, nppiMulC_32f_C1R);\r
+}\r
+\r
+void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst)\r
+{\r
+ nppArithmCaller(src, sc, dst, nppiDivC_32f_C1R);\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
{\r
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());\r
\r
- CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_32FC1);\r
+ CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1);\r
\r
dst.create( src1.size(), src1.type() );\r
\r
sz.width = src1.cols;\r
sz.height = src1.rows;\r
\r
- if (src1.type() == CV_8UC1)\r
+ switch (src1.type())\r
{\r
+ case CV_8UC1:\r
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), src1.step, \r
src2.ptr<Npp8u>(), src2.step, \r
dst.ptr<Npp8u>(), dst.step, sz) );\r
- }\r
- else\r
- {\r
+ break;\r
+ case CV_8UC4:\r
+ nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr<Npp8u>(), src1.step, \r
+ src2.ptr<Npp8u>(), src2.step, \r
+ dst.ptr<Npp8u>(), dst.step, sz) );\r
+ break;\r
+ case CV_32SC1:\r
+ nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr<Npp32s>(), src1.step,\r
+ src2.ptr<Npp32s>(), src2.step,\r
+ dst.ptr<Npp32s>(), dst.step, sz) );\r
+ break;\r
+ case CV_32FC1:\r
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), src1.step,\r
src2.ptr<Npp32f>(), src2.step,\r
dst.ptr<Npp32f>(), dst.step, sz) );\r
+ break;\r
+ default:\r
+ CV_Assert(!"Unsupported source type");\r
}\r
}\r
\r
+void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)\r
+{\r
+ CV_Assert(src.type() == CV_32FC1);\r
+\r
+ dst.create( src.size(), src.type() );\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz, (Npp32f)s[0]) );\r
+}\r
+\r
////////////////////////////////////////////////////////////////////////\r
// compare\r
\r
}\r
}\r
\r
+////////////////////////////////////////////////////////////////////////\r
+// exp\r
+\r
+void cv::gpu::exp(const GpuMat& src, GpuMat& dst)\r
+{\r
+ CV_Assert(src.type() == CV_32FC1);\r
+\r
+ dst.create(src.size(), src.type());\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiExp_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////\r
+// log\r
+\r
+void cv::gpu::log(const GpuMat& src, GpuMat& dst)\r
+{\r
+ CV_Assert(src.type() == CV_32FC1);\r
+\r
+ dst.create(src.size(), src.type());\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiLn_32f_C1R(src.ptr<Npp32f>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+}\r
+\r
+////////////////////////////////////////////////////////////////////////\r
+// magnitude\r
+\r
+void cv::gpu::magnitude(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)\r
+{\r
+ CV_DbgAssert(src1.type() == src2.type() && src1.size() == src2.size());\r
+ CV_Assert(src1.type() == CV_32FC1);\r
+\r
+ GpuMat src(src1.size(), CV_32FC2);\r
+ GpuMat srcs[] = {src1, src2};\r
+ cv::gpu::merge(srcs, 2, src);\r
+\r
+ dst.create(src1.size(), src1.type());\r
+\r
+ NppiSize sz;\r
+ sz.width = src.cols;\r
+ sz.height = src.rows;\r
+\r
+ nppSafeCall( nppiMagnitude_32fc32f_C1R(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );\r
+}\r
+\r
#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
int mm = ::max(width, height);\r
iters = mm / 100 + 2;\r
\r
- levels = (int)(log(static_cast<double>(mm)) + 1) * 4 / 5;\r
+ levels = (int)(::log(static_cast<double>(mm)) + 1) * 4 / 5;\r
if (levels == 0) levels++;\r
}\r
\r
int mm = ::max(width, height);\r
iters = mm / 100 + ((mm > 1200)? - 4 : 4);\r
\r
- levels = (int)log(static_cast<double>(mm)) * 2 / 3;\r
+ levels = (int)::log(static_cast<double>(mm)) * 2 / 3;\r
if (levels == 0) levels++;\r
\r
nr_plane = (int) ((float) ndisp / pow(2.0, levels + 1));\r
\r
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation)\r
{\r
- static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};\r
+ static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
\r
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
- CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4);\r
+ CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
\r
CV_Assert( src.size().area() > 0 );\r
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
\r
GpuMat& GpuMat::operator = (const Scalar& s)\r
{\r
- matrix_operations::set_to_without_mask( *this, depth(), s.val, channels());\r
+ setTo(s);\r
return *this;\r
}\r
\r
GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)\r
{\r
- //CV_Assert(mask.type() == CV_8U);\r
+ CV_Assert(mask.type() == CV_8UC1);\r
\r
CV_DbgAssert(!this->empty());\r
+ \r
+ NppiSize sz;\r
+ sz.width = cols;\r
+ sz.height = rows;\r
\r
if (mask.empty())\r
{\r
{\r
case CV_8UC1:\r
{\r
- NppiSize sz;\r
- sz.width = cols;\r
- sz.height = rows;\r
Npp8u nVal = (Npp8u)s[0];\r
- nppSafeCall( nppiSet_8u_C1R(nVal, (Npp8u*)ptr<char>(), step, sz) );\r
+ nppSafeCall( nppiSet_8u_C1R(nVal, ptr<Npp8u>(), step, sz) );\r
break;\r
}\r
case CV_8UC4:\r
{\r
- NppiSize sz;\r
- sz.width = cols;\r
- sz.height = rows;\r
- Npp8u nVal[] = {(Npp8u)s[0], (Npp8u)s[1], (Npp8u)s[2], (Npp8u)s[3]};\r
- nppSafeCall( nppiSet_8u_C4R(nVal, (Npp8u*)ptr<char>(), step, sz) );\r
+ Scalar_<Npp8u> nVal = s;\r
+ nppSafeCall( nppiSet_8u_C4R(nVal.val, ptr<Npp8u>(), step, sz) );\r
+ break;\r
+ }\r
+ case CV_16UC1:\r
+ {\r
+ Npp16u nVal = (Npp16u)s[0];\r
+ nppSafeCall( nppiSet_16u_C1R(nVal, ptr<Npp16u>(), step, sz) );\r
+ break;\r
+ }\r
+ /*case CV_16UC2:\r
+ {\r
+ Scalar_<Npp16u> nVal = s;\r
+ nppSafeCall( nppiSet_16u_C2R(nVal.val, ptr<Npp16u>(), step, sz) );\r
+ break;\r
+ }*/\r
+ case CV_16UC4:\r
+ {\r
+ Scalar_<Npp16u> nVal = s;\r
+ nppSafeCall( nppiSet_16u_C4R(nVal.val, ptr<Npp16u>(), step, sz) );\r
+ break;\r
+ }\r
+ case CV_16SC1:\r
+ {\r
+ Npp16s nVal = (Npp16s)s[0];\r
+ nppSafeCall( nppiSet_16s_C1R(nVal, ptr<Npp16s>(), step, sz) );\r
+ break;\r
+ }\r
+ /*case CV_16SC2:\r
+ {\r
+ Scalar_<Npp16s> nVal = s;\r
+ nppSafeCall( nppiSet_16s_C2R(nVal.val, ptr<Npp16s>(), step, sz) );\r
+ break;\r
+ }*/\r
+ case CV_16SC4:\r
+ {\r
+ Scalar_<Npp16s> nVal = s;\r
+ nppSafeCall( nppiSet_16s_C4R(nVal.val, ptr<Npp16s>(), step, sz) );\r
break;\r
}\r
case CV_32SC1:\r
{\r
- NppiSize sz;\r
- sz.width = cols;\r
- sz.height = rows;\r
Npp32s nVal = (Npp32s)s[0];\r
- nppSafeCall( nppiSet_32s_C1R(nVal, (Npp32s*)ptr<char>(), step, sz) );\r
+ nppSafeCall( nppiSet_32s_C1R(nVal, ptr<Npp32s>(), step, sz) );\r
+ break;\r
+ }\r
+ case CV_32SC4:\r
+ {\r
+ Scalar_<Npp32s> nVal = s;\r
+ nppSafeCall( nppiSet_32s_C4R(nVal.val, ptr<Npp32s>(), step, sz) );\r
break;\r
}\r
case CV_32FC1:\r
{\r
- NppiSize sz;\r
- sz.width = cols;\r
- sz.height = rows;\r
Npp32f nVal = (Npp32f)s[0];\r
- nppSafeCall( nppiSet_32f_C1R(nVal, (Npp32f*)ptr<char>(), step, sz) );\r
+ nppSafeCall( nppiSet_32f_C1R(nVal, ptr<Npp32f>(), step, sz) );\r
+ break;\r
+ }\r
+ case CV_32FC4:\r
+ {\r
+ Scalar_<Npp32f> nVal = s;\r
+ nppSafeCall( nppiSet_32f_C4R(nVal.val, ptr<Npp32f>(), step, sz) );\r
break;\r
}\r
default:\r
} \r
}\r
else\r
- matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels());\r
+ {\r
+ switch (type())\r
+ {\r
+ case CV_8UC1:\r
+ {\r
+ Npp8u nVal = (Npp8u)s[0];\r
+ nppSafeCall( nppiSet_8u_C1MR(nVal, ptr<Npp8u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_8UC4:\r
+ {\r
+ Scalar_<Npp8u> nVal = s;\r
+ nppSafeCall( nppiSet_8u_C4MR(nVal.val, ptr<Npp8u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_16UC1:\r
+ {\r
+ Npp16u nVal = (Npp16u)s[0];\r
+ nppSafeCall( nppiSet_16u_C1MR(nVal, ptr<Npp16u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_16UC4:\r
+ {\r
+ Scalar_<Npp16u> nVal = s;\r
+ nppSafeCall( nppiSet_16u_C4MR(nVal.val, ptr<Npp16u>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_16SC1:\r
+ {\r
+ Npp16s nVal = (Npp16s)s[0];\r
+ nppSafeCall( nppiSet_16s_C1MR(nVal, ptr<Npp16s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_16SC4:\r
+ {\r
+ Scalar_<Npp16s> nVal = s;\r
+ nppSafeCall( nppiSet_16s_C4MR(nVal.val, ptr<Npp16s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_32SC1:\r
+ {\r
+ Npp32s nVal = (Npp32s)s[0];\r
+ nppSafeCall( nppiSet_32s_C1MR(nVal, ptr<Npp32s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_32SC4:\r
+ {\r
+ Scalar_<Npp32s> nVal = s;\r
+ nppSafeCall( nppiSet_32s_C4MR(nVal.val, ptr<Npp32s>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_32FC1:\r
+ {\r
+ Npp32f nVal = (Npp32f)s[0];\r
+ nppSafeCall( nppiSet_32f_C1MR(nVal, ptr<Npp32f>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ case CV_32FC4:\r
+ {\r
+ Scalar_<Npp32f> nVal = s;\r
+ nppSafeCall( nppiSet_32f_C4MR(nVal.val, ptr<Npp32f>(), step, sz, mask.ptr<Npp8u>(), mask.step) );\r
+ break;\r
+ }\r
+ default:\r
+ matrix_operations::set_to_with_mask( *this, depth(), s.val, mask, channels());\r
+ }\r
+ }\r
\r
return *this;\r
}\r
cv::Size sz(200, 200);\r
cv::Mat mat1(sz, type), mat2(sz, type);\r
cv::RNG rng(*ts->get_rng());\r
- rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(10), cv::Scalar::all(100));\r
- rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(10), cv::Scalar::all(100));\r
+ rng.fill(mat1, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20));\r
+ rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(1), cv::Scalar::all(20));\r
\r
return test(mat1, mat2);\r
}\r
int testResult = CvTS::OK;\r
try\r
{\r
- const int types[] = {CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1};\r
- const char* type_names[] = {"CV_8UC1", "CV_8UC3", "CV_8UC4", "CV_32FC1"};\r
+ const int types[] = {CV_8UC1, CV_8UC3, CV_8UC4, CV_32SC1, CV_32FC1};\r
+ const char* type_names[] = {"CV_8UC1", "CV_8UC3", "CV_8UC4", "CV_32SC1", "CV_32FC1"};\r
const int type_count = sizeof(types)/sizeof(types[0]);\r
\r
//run tests\r
\r
virtual int test(const Mat& mat1, const Mat& mat2)\r
{\r
- if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)\r
+ if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
\r
int test( const Mat& mat1, const Mat& mat2 )\r
{\r
- if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)\r
+ if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
\r
int test( const Mat& mat1, const Mat& mat2 )\r
{\r
- if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)\r
+ if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
\r
int test( const Mat& mat1, const Mat& mat2 )\r
{\r
- if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32FC1)\r
+ if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
\r
int test( const Mat& mat1, const Mat& mat2 )\r
{\r
- if (mat1.type() != CV_8UC1 && mat1.type() != CV_32FC1)\r
+ if (mat1.type() != CV_8UC1 && mat1.type() != CV_8UC4 && mat1.type() != CV_32SC1 && mat1.type() != CV_32FC1)\r
{\r
ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
return CvTS::OK;\r
}\r
};\r
\r
+////////////////////////////////////////////////////////////////////////////////\r
+// exp\r
+struct CV_GpuNppImageExpTest : public CV_GpuArithmTest\r
+{\r
+ CV_GpuNppImageExpTest() : CV_GpuArithmTest( "GPU-NppImageExp", "exp" ) {}\r
+\r
+ int test( const Mat& mat1, const Mat& )\r
+ {\r
+ if (mat1.type() != CV_32FC1)\r
+ {\r
+ ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
+ return CvTS::OK;\r
+ }\r
+\r
+ cv::Mat cpuRes;\r
+ cv::exp(mat1, cpuRes);\r
+\r
+ GpuMat gpu1(mat1);\r
+ GpuMat gpuRes;\r
+ cv::gpu::exp(gpu1, gpuRes);\r
+\r
+ return CheckNorm(cpuRes, gpuRes);\r
+ }\r
+};\r
+\r
+////////////////////////////////////////////////////////////////////////////////\r
+// log\r
+struct CV_GpuNppImageLogTest : public CV_GpuArithmTest\r
+{\r
+ CV_GpuNppImageLogTest() : CV_GpuArithmTest( "GPU-NppImageLog", "log" ) {}\r
+\r
+ int test( const Mat& mat1, const Mat& )\r
+ {\r
+ if (mat1.type() != CV_32FC1)\r
+ {\r
+ ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
+ return CvTS::OK;\r
+ }\r
+\r
+ cv::Mat cpuRes;\r
+ cv::log(mat1, cpuRes);\r
+\r
+ GpuMat gpu1(mat1);\r
+ GpuMat gpuRes;\r
+ cv::gpu::log(gpu1, gpuRes);\r
+\r
+ return CheckNorm(cpuRes, gpuRes);\r
+ }\r
+};\r
+\r
+////////////////////////////////////////////////////////////////////////////////\r
+// magnitude\r
+struct CV_GpuNppImageMagnitudeTest : public CV_GpuArithmTest\r
+{\r
+ CV_GpuNppImageMagnitudeTest() : CV_GpuArithmTest( "GPU-NppImageMagnitude", "magnitude" ) {}\r
+\r
+ int test( const Mat& mat1, const Mat& mat2 )\r
+ {\r
+ if (mat1.type() != CV_32FC1)\r
+ {\r
+ ts->printf(CvTS::LOG, "\nUnsupported type\n");\r
+ return CvTS::OK;\r
+ }\r
+\r
+ cv::Mat cpuRes;\r
+ cv::magnitude(mat1, mat2, cpuRes);\r
+\r
+ GpuMat gpu1(mat1);\r
+ GpuMat gpu2(mat2);\r
+ GpuMat gpuRes;\r
+ cv::gpu::magnitude(gpu1, gpu2, gpuRes);\r
+\r
+ return CheckNorm(cpuRes, gpuRes);\r
+ }\r
+};\r
+\r
/////////////////////////////////////////////////////////////////////////////\r
/////////////////// tests registration /////////////////////////////////////\r
/////////////////////////////////////////////////////////////////////////////\r
CV_GpuNppImageFlipTest CV_GpuNppImageFlip_test;\r
CV_GpuNppImageSumTest CV_GpuNppImageSum_test;\r
CV_GpuNppImageMinNaxTest CV_GpuNppImageMinNax_test;\r
-CV_GpuNppImageLUTTest CV_GpuNppImageLUT_test;
\ No newline at end of file
+CV_GpuNppImageLUTTest CV_GpuNppImageLUT_test;\r
+CV_GpuNppImageExpTest CV_GpuNppImageExp_test;\r
+CV_GpuNppImageLogTest CV_GpuNppImageLog_test;\r
+CV_GpuNppImageMagnitudeTest CV_GpuNppImageMagnitude_test;\r
const char* blacklist[] =
{
- "GPU-NppImageSum",
- "GPU-MatOperatorAsyncCall",
- //"GPU-NppErode",
- //"GPU-NppDilate",
- //"GPU-NppMorphologyEx",
- //"GPU-NppImageDivide",
- //"GPU-NppImageMeanStdDev",
- //"GPU-NppImageMinNax",
- //"GPU-NppImageResize",
- //"GPU-NppImageWarpAffine",
- //"GPU-NppImageWarpPerspective",
- //"GPU-NppImageIntegral",
- //"GPU-NppImageBlur",
+ "GPU-NppImageSum", // crash
+ "GPU-MatOperatorAsyncCall", // crash
+ //"GPU-NppErode", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+ //"GPU-NppDilate", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+ //"GPU-NppMorphologyEx", // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+ //"GPU-NppImageDivide", // different round mode
+ //"GPU-NppImageMeanStdDev", // different precision
+ //"GPU-NppImageMinNax", // npp bug
+ //"GPU-NppImageResize", // different precision in interpolation
+ //"GPU-NppImageWarpAffine", // different precision in interpolation
+ //"GPU-NppImageWarpPerspective", // different precision in interpolation
+ //"GPU-NppImageIntegral", // different precision
+ //"GPU-NppImageBlur", // different precision
+ //"GPU-NppImageExp", // different precision
+ //"GPU-NppImageLog", // different precision
+ //"GPU-NppImageMagnitude", // different precision
0
};
const int types[] = {CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F};
const int types_num = sizeof(types) / sizeof(int);
-
const char* types_str[] = {"CV_8U", "CV_8S", "CV_16U", "CV_16S", "CV_32S", "CV_32F", "CV_64F"};
bool passed = true;
{
for (int j = 0; j < types_num && passed; ++j)
{
- for (int c = 1; c < 2 && passed; ++c)
+ for (int c = 1; c < 5 && passed; ++c)
{
const int src_type = CV_MAKETYPE(types[i], c);
const int dst_type = types[j];
- const double alpha = (double)rand() / RAND_MAX * 2.0;
- const double beta = (double)rand() / RAND_MAX * 150.0 - 75;
cv::RNG rng(*ts->get_rng());
+ const double alpha = rng.uniform(0.0, 2.0);
+ const double beta = rng.uniform(-75.0, 75.0);
Mat cpumatsrc(img_size, src_type);
-
rng.fill(cpumatsrc, RNG::UNIFORM, Scalar::all(0), Scalar::all(300));
GpuMat gpumatsrc(cpumatsrc);
//M*/
#include "gputest.hpp"
-#include "highgui.h"
-
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <iterator>
#include <limits>
-#include <numeric>
-#include <iomanip> // for cout << setw()
using namespace cv;
using namespace std;
protected:
void run(int);
- void print_mat(cv::Mat & mat, std::string name = "cpu mat");
- void print_mat(gpu::GpuMat & mat, std::string name = "gpu mat");
- bool compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat);
+
+ bool testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask = cv::Mat(), const cv::gpu::GpuMat& gpumask = cv::gpu::GpuMat());
private:
int rows;
CV_GpuMatOpSetToTest::CV_GpuMatOpSetToTest(): CvTest( "GPU-MatOperatorSetTo", "setTo" )
{
- rows = 256;
- cols = 124;
+ rows = 35;
+ cols = 67;
s.val[0] = 127.0;
s.val[1] = 127.0;
s.val[2] = 127.0;
s.val[3] = 127.0;
-
- //#define PRINT_MATRIX
}
-
-void CV_GpuMatOpSetToTest::print_mat(cv::Mat & mat, std::string name )
+bool CV_GpuMatOpSetToTest::testSetTo(cv::Mat& cpumat, gpu::GpuMat& gpumat, const cv::Mat& cpumask, const cv::gpu::GpuMat& gpumask)
{
- cv::imshow(name, mat);
-}
+ cpumat.setTo(s, cpumask);
+ gpumat.setTo(s, gpumask);
-void CV_GpuMatOpSetToTest::print_mat(gpu::GpuMat & mat, std::string name)
-{
- cv::Mat newmat;
- mat.download(newmat);
- print_mat(newmat, name);
-}
+ double ret = norm(cpumat, gpumat, NORM_INF);
-bool CV_GpuMatOpSetToTest::compare_matrix(cv::Mat & cpumat, gpu::GpuMat & gpumat)
-{
- //int64 time = getTickCount();
- cpumat.setTo(s);
- //int64 time1 = getTickCount();
- gpumat.setTo(s);
- //int64 time2 = getTickCount();
-
- //std::cout << "\ntime cpu: " << std::fixed << std::setprecision(12) << double((time1 - time) / (double)getTickFrequency());
- //std::cout << "\ntime gpu: " << std::fixed << std::setprecision(12) << double((time2 - time1) / (double)getTickFrequency());
- //std::cout << "\n";
-
-#ifdef PRINT_MATRIX
- print_mat(cpumat);
- print_mat(gpumat);
- cv::waitKey(0);
-#endif
-
- double ret = norm(cpumat, gpumat);
-
- if (ret < 1.0)
+ if (ret < std::numeric_limits<double>::epsilon())
return true;
else
{
try
{
+ cv::Mat cpumask(rows, cols, CV_8UC1);
+ cv::RNG rng(*ts->get_rng());
+ rng.fill(cpumask, RNG::UNIFORM, cv::Scalar::all(0.0), cv::Scalar(1.5));
+ cv::gpu::GpuMat gpumask(cpumask);
+
for (int i = 0; i < 7; i++)
{
- Mat cpumat(rows, cols, i, Scalar::all(0));
- GpuMat gpumat(cpumat);
- is_test_good &= compare_matrix(cpumat, gpumat);
+ for (int cn = 1; cn <= 4; ++cn)
+ {
+ int mat_type = CV_MAKETYPE(i, cn);
+ Mat cpumat(rows, cols, mat_type, Scalar::all(0));
+ GpuMat gpumat(cpumat);
+ is_test_good &= testSetTo(cpumat, gpumat, cpumask, gpumask);
+ }
}
}
catch(const cv::Exception& e)