From 0465b89e7ecbb9339babe79a8050f555479def29 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 20 Dec 2010 09:07:19 +0000 Subject: [PATCH] gpu module refactoring: moved per-element operations into separated file --- modules/gpu/include/opencv2/gpu/gpu.hpp | 148 +++---- modules/gpu/src/arithm.cpp | 530 ------------------------- modules/gpu/src/cuda/element_operations.cu | 348 +++++++++++++++++ modules/gpu/src/cuda/mathfunc.cu | 330 ++-------------- modules/gpu/src/element_operations.cpp | 605 ++++++++++++++++++++++++++++- tests/gpu/src/bitwise_oper.cpp | 2 +- 6 files changed, 1048 insertions(+), 915 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index a8329da..4827f85 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -362,46 +362,10 @@ namespace cv ////////////////////////////// Arithmetics /////////////////////////////////// - //! adds one matrix to another (c = a + b) - //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types - CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c); - //! adds scalar to a matrix (c = a + s) - //! supports CV_32FC1 and CV_32FC2 type - CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c); - //! subtracts one matrix from another (c = a - b) - //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types - CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c); - //! subtracts scalar from a matrix (c = a - s) - //! supports CV_32FC1 and CV_32FC2 type - CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c); - //! computes element-wise product of the two arrays (c = a * b) - //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types - CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c); - //! multiplies matrix to a scalar (c = a * s) - //! supports CV_32FC1 and CV_32FC2 type - CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c); - //! computes element-wise quotient of the two arrays (c = a / b) - //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types - CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c); - //! computes element-wise quotient of matrix and scalar (c = a / s) - //! supports CV_32FC1 and CV_32FC2 type - CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c); - //! transposes the matrix //! supports CV_8UC1, CV_8SC1, CV_8UC4, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32FC1 type CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst); - //! computes element-wise absolute difference of two arrays (c = abs(a - b)) - //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types - CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c); - //! computes element-wise absolute difference of array and scalar (c = abs(a - s)) - //! supports only CV_32FC1 type - CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c); - - //! compares elements of two arrays (c = a b) - //! supports CV_8UC4, CV_32FC1 types - CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop); - //! computes mean value and standard deviation of all or selected array elements //! supports only CV_8UC1 type CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); @@ -485,14 +449,6 @@ namespace cv //! copies each plane of a multi-channel array to a dedicated array (async version) CV_EXPORTS void split(const GpuMat& src, vector& dst, const Stream& stream); - //! computes exponent of each matrix element (b = e**a) - //! supports only CV_32FC1 type - CV_EXPORTS void exp(const GpuMat& a, GpuMat& b); - - //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) - //! supports only CV_32FC1 type - CV_EXPORTS void log(const GpuMat& a, GpuMat& b); - //! computes magnitude of complex (x(i).re, x(i).im) vector //! supports only CV_32FC2 type CV_EXPORTS void magnitude(const GpuMat& x, GpuMat& magnitude); @@ -531,33 +487,6 @@ namespace cv //! async version CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, const Stream& stream); - - //! perfroms per-elements bit-wise inversion - CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat()); - //! async version - CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream); - - //! calculates per-element bit-wise disjunction of two arrays - CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); - //! async version - CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); - - //! calculates per-element bit-wise conjunction of two arrays - CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); - //! async version - CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); - - //! calculates per-element bit-wise "exclusive or" operation - CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); - //! async version - CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); - - //! Logical operators - CV_EXPORTS GpuMat operator ~ (const GpuMat& src); - CV_EXPORTS GpuMat operator | (const GpuMat& src1, const GpuMat& src2); - CV_EXPORTS GpuMat operator & (const GpuMat& src1, const GpuMat& src2); - CV_EXPORTS GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2); - //! computes per-element minimum of two arrays (dst = min(src1, src2)) CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst); //! Async version @@ -578,6 +507,83 @@ namespace cv //! Async version CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream); + //////////////////////////// Per-element operations //////////////////////////////////// + + //! adds one matrix to another (c = a + b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! adds scalar to a matrix (c = a + s) + //! supports CV_32FC1 and CV_32FC2 type + CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c); + + //! subtracts one matrix from another (c = a - b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! subtracts scalar from a matrix (c = a - s) + //! supports CV_32FC1 and CV_32FC2 type + CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c); + + //! computes element-wise product of the two arrays (c = a * b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! multiplies matrix to a scalar (c = a * s) + //! supports CV_32FC1 and CV_32FC2 type + CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c); + + //! computes element-wise quotient of the two arrays (c = a / b) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! computes element-wise quotient of matrix and scalar (c = a / s) + //! supports CV_32FC1 and CV_32FC2 type + CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c); + + //! computes exponent of each matrix element (b = e**a) + //! supports only CV_32FC1 type + CV_EXPORTS void exp(const GpuMat& a, GpuMat& b); + + //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) + //! supports only CV_32FC1 type + CV_EXPORTS void log(const GpuMat& a, GpuMat& b); + + //! computes element-wise absolute difference of two arrays (c = abs(a - b)) + //! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types + CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c); + //! computes element-wise absolute difference of array and scalar (c = abs(a - s)) + //! supports only CV_32FC1 type + CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c); + + //! compares elements of two arrays (c = a b) + //! supports CV_8UC4, CV_32FC1 types + CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop); + + //! performs per-elements bit-wise inversion + CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat()); + //! version without mask + CV_EXPORTS GpuMat operator ~ (const GpuMat& src); + //! async version + CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream); + + //! calculates per-element bit-wise disjunction of two arrays + CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); + //! version without mask + CV_EXPORTS GpuMat operator | (const GpuMat& src1, const GpuMat& src2); + //! async version + CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); + + //! calculates per-element bit-wise conjunction of two arrays + CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); + //! version without mask + CV_EXPORTS GpuMat operator & (const GpuMat& src1, const GpuMat& src2); + //! async version + CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); + + //! calculates per-element bit-wise "exclusive or" operation + CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat()); + //! version without mask + CV_EXPORTS GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2); + //! async version + CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream); + ////////////////////////////// Image processing ////////////////////////////// diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp index 8a7abb8..56ba525 100644 --- a/modules/gpu/src/arithm.cpp +++ b/modules/gpu/src/arithm.cpp @@ -48,18 +48,7 @@ using namespace std; #if !defined (HAVE_CUDA) -void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } -void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } -void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } -void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } void cv::gpu::transpose(const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } -void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); } void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_nogpu(); } double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; } double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; } @@ -89,18 +78,6 @@ void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); } void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); } void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, const Stream&) { throw_nogpu(); } -void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } -void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } -void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } -void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } -void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } -void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } -void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } -void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } -cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuMat(); } -cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } -cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } -cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); } void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); } @@ -113,164 +90,6 @@ void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu() #else /* !defined (HAVE_CUDA) */ //////////////////////////////////////////////////////////////////////// -// add subtract multiply divide - -namespace -{ - typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, - NppiSize oSizeROI, int nScaleFactor); - typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, - int nDstStep, NppiSize oSizeROI); - typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, - int nDstStep, NppiSize oSizeROI); - - void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, - npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, - npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1) - { - CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - - CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); - - dst.create( src1.size(), src1.type() ); - - NppiSize sz; - sz.width = src1.cols; - sz.height = src1.rows; - - switch (src1.type()) - { - case CV_8UC1: - nppSafeCall( npp_func_8uc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, 0) ); - break; - case CV_8UC4: - nppSafeCall( npp_func_8uc4(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, 0) ); - break; - case CV_32SC1: - nppSafeCall( npp_func_32sc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - case CV_32FC1: - nppSafeCall( npp_func_32fc1(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - default: - CV_Assert(!"Unsupported source type"); - } - } - - template struct NppArithmScalarFunc; - template<> struct NppArithmScalarFunc<1> - { - typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, - int nDstStep, NppiSize oSizeROI); - }; - template<> struct NppArithmScalarFunc<2> - { - typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst, - int nDstStep, NppiSize oSizeROI); - }; - - template::func_ptr func> struct NppArithmScalar; - template::func_ptr func> struct NppArithmScalar<1, func> - { - static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) - { - dst.create(src.size(), src.type()); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - nppSafeCall( func(src.ptr(), src.step, (Npp32f)sc[0], dst.ptr(), dst.step, sz) ); - } - }; - template::func_ptr func> struct NppArithmScalar<2, func> - { - static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) - { - dst.create(src.size(), src.type()); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - Npp32fc nValue; - nValue.re = (Npp32f)sc[0]; - nValue.im = (Npp32f)sc[1]; - - nppSafeCall( func(src.ptr(), src.step, nValue, dst.ptr(), dst.step, sz) ); - } - }; -} - -void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) -{ - nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R); -} - -void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) -{ - nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R); -} - -void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) -{ - nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R); -} - -void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) -{ - nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R); -} - -void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst) -{ - typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc}; - - CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); - - callers[src.channels()](src, sc, dst); -} - -void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst) -{ - typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc}; - - CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); - - callers[src.channels()](src, sc, dst); -} - -void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst) -{ - typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc}; - - CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); - - callers[src.channels()](src, sc, dst); -} - -void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst) -{ - typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); - static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc}; - - CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); - - callers[src.channels()](src, sc, dst); -} - -//////////////////////////////////////////////////////////////////////// // transpose namespace cv { namespace gpu { namespace mathfunc @@ -300,112 +119,6 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst) } //////////////////////////////////////////////////////////////////////// -// absdiff - -void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) -{ - CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - - CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); - - dst.create( src1.size(), src1.type() ); - - NppiSize sz; - sz.width = src1.cols; - sz.height = src1.rows; - - switch (src1.type()) - { - case CV_8UC1: - nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - case CV_8UC4: - nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - case CV_32SC1: - nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - case CV_32FC1: - nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz) ); - break; - default: - CV_Assert(!"Unsupported source type"); - } -} - -void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst) -{ - CV_Assert(src.type() == CV_32FC1); - - dst.create( src.size(), src.type() ); - - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, (Npp32f)s[0]) ); -} - -//////////////////////////////////////////////////////////////////////// -// compare - -namespace cv { namespace gpu { namespace mathfunc -{ - void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst); - void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst); -}}} - -void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) -{ - CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); - - CV_Assert(src1.type() == CV_8UC4 || src1.type() == CV_32FC1); - - dst.create( src1.size(), CV_8UC1 ); - - static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ }; - - NppiSize sz; - sz.width = src1.cols; - sz.height = src1.rows; - - if (src1.type() == CV_8UC4) - { - if (cmpop != CMP_NE) - { - nppSafeCall( nppiCompare_8u_C4R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, nppCmpOp[cmpop]) ); - } - else - { - mathfunc::compare_ne_8uc4(src1, src2, dst); - } - } - else - { - if (cmpop != CMP_NE) - { - nppSafeCall( nppiCompare_32f_C1R(src1.ptr(), src1.step, - src2.ptr(), src2.step, - dst.ptr(), dst.step, sz, nppCmpOp[cmpop]) ); - } - else - { - mathfunc::compare_ne_32f(src1, src2, dst); - } - } -} - -//////////////////////////////////////////////////////////////////////// // meanStdDev void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) @@ -997,249 +710,6 @@ void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& ::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream)); } -////////////////////////////////////////////////////////////////////////////// -// Per-element bit-wise logical matrix operations - -namespace cv { namespace gpu { namespace mathfunc -{ - void bitwise_not_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream); - - template - void bitwise_mask_not_caller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream); - - void bitwise_or_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); - - template - void bitwise_mask_or_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); - - void bitwise_and_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); - - template - void bitwise_mask_and_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); - - void bitwise_xor_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); - - template - void bitwise_mask_xor_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); -}}} - -namespace -{ - void bitwise_not_caller(const GpuMat& src, GpuMat& dst, cudaStream_t stream) - { - dst.create(src.size(), src.type()); - - cv::gpu::mathfunc::bitwise_not_caller(src.rows, src.cols, src.elemSize1(), - dst.channels(), src, dst, stream); - } - - - void bitwise_not_caller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - using namespace cv::gpu; - - typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - static Caller callers[] = {mathfunc::bitwise_mask_not_caller, mathfunc::bitwise_mask_not_caller, - mathfunc::bitwise_mask_not_caller, mathfunc::bitwise_mask_not_caller, - mathfunc::bitwise_mask_not_caller, mathfunc::bitwise_mask_not_caller, - mathfunc::bitwise_mask_not_caller}; - - CV_Assert(mask.type() == CV_8U && mask.size() == src.size()); - dst.create(src.size(), src.type()); - - Caller caller = callers[src.depth()]; - CV_Assert(caller); - - int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int)); - caller(src.rows, src.cols, cn, src, mask, dst, stream); - } - - - void bitwise_or_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) - { - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - cv::gpu::mathfunc::bitwise_or_caller(dst.rows, dst.cols, dst.elemSize1(), - dst.channels(), src1, src2, dst, stream); - } - - - void bitwise_or_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - using namespace cv::gpu; - - typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - static Caller callers[] = {mathfunc::bitwise_mask_or_caller, mathfunc::bitwise_mask_or_caller, - mathfunc::bitwise_mask_or_caller, mathfunc::bitwise_mask_or_caller, - mathfunc::bitwise_mask_or_caller, mathfunc::bitwise_mask_or_caller, - mathfunc::bitwise_mask_or_caller}; - - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - Caller caller = callers[src1.depth()]; - CV_Assert(caller); - - int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); - caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); - } - - - void bitwise_and_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) - { - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - cv::gpu::mathfunc::bitwise_and_caller(dst.rows, dst.cols, dst.elemSize1(), - dst.channels(), src1, src2, dst, stream); - } - - - void bitwise_and_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - using namespace cv::gpu; - - typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - static Caller callers[] = {mathfunc::bitwise_mask_and_caller, mathfunc::bitwise_mask_and_caller, - mathfunc::bitwise_mask_and_caller, mathfunc::bitwise_mask_and_caller, - mathfunc::bitwise_mask_and_caller, mathfunc::bitwise_mask_and_caller, - mathfunc::bitwise_mask_and_caller}; - - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - Caller caller = callers[src1.depth()]; - CV_Assert(caller); - - int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); - caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); - } - - - void bitwise_xor_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) - { - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - cv::gpu::mathfunc::bitwise_xor_caller(dst.rows, dst.cols, dst.elemSize1(), - dst.channels(), src1, src2, dst, stream); - } - - - void bitwise_xor_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) - { - using namespace cv::gpu; - - typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - static Caller callers[] = {mathfunc::bitwise_mask_xor_caller, mathfunc::bitwise_mask_xor_caller, - mathfunc::bitwise_mask_xor_caller, mathfunc::bitwise_mask_xor_caller, - mathfunc::bitwise_mask_xor_caller, mathfunc::bitwise_mask_xor_caller, - mathfunc::bitwise_mask_xor_caller}; - - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - dst.create(src1.size(), src1.type()); - - Caller caller = callers[src1.depth()]; - CV_Assert(caller); - - int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); - caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); - } -} - -void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask) -{ - if (mask.empty()) - ::bitwise_not_caller(src, dst, 0); - else - ::bitwise_not_caller(src, dst, mask, 0); -} - -void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream) -{ - if (mask.empty()) - ::bitwise_not_caller(src, dst, StreamAccessor::getStream(stream)); - else - ::bitwise_not_caller(src, dst, mask, StreamAccessor::getStream(stream)); -} - -void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) -{ - if (mask.empty()) - ::bitwise_or_caller(src1, src2, dst, 0); - else - ::bitwise_or_caller(src1, src2, dst, mask, 0); -} - -void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) -{ - if (mask.empty()) - ::bitwise_or_caller(src1, src2, dst, StreamAccessor::getStream(stream)); - else - ::bitwise_or_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); -} - -void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) -{ - if (mask.empty()) - ::bitwise_and_caller(src1, src2, dst, 0); - else - ::bitwise_and_caller(src1, src2, dst, mask, 0); -} - -void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) -{ - if (mask.empty()) - ::bitwise_and_caller(src1, src2, dst, StreamAccessor::getStream(stream)); - else - ::bitwise_and_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); -} - -void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) -{ - if (mask.empty()) - ::bitwise_xor_caller(src1, src2, dst, 0); - else - ::bitwise_xor_caller(src1, src2, dst, mask, 0); -} - -void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) -{ - if (mask.empty()) - ::bitwise_xor_caller(src1, src2, dst, StreamAccessor::getStream(stream)); - else - ::bitwise_xor_caller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); - -} - -cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat& src) -{ - GpuMat dst; - bitwise_not(src, dst); - return dst; -} - -cv::gpu::GpuMat cv::gpu::operator | (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_or(src1, src2, dst); - return dst; -} - -cv::gpu::GpuMat cv::gpu::operator & (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_and(src1, src2, dst); - return dst; -} - -cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_xor(src1, src2, dst); - return dst; -} ////////////////////////////////////////////////////////////////////////////// // min/max diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index e69de29..ba9d011 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -0,0 +1,348 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "opencv2/gpu/device/vecmath.hpp" +#include "transform.hpp" +#include "internal_shared.hpp" + +using namespace cv::gpu; +using namespace cv::gpu::device; + +namespace cv { namespace gpu { namespace mathfunc +{ + + ////////////////////////////////////////////////////////////////////////////////////// + // Compare + + template + struct NotEqual + { + __device__ uchar operator()(const T1& src1, const T2& src2) + { + return static_cast(static_cast(src1 != src2) * 255); + } + }; + + template + inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) + { + NotEqual op; + transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), dst, op, 0); + } + + void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) + { + compare_ne(src1, src2, dst); + } + void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) + { + compare_ne(src1, src2, dst); + } + + + ////////////////////////////////////////////////////////////////////////// + // Unary bitwise logical matrix operations + + enum { UN_OP_NOT }; + + template + struct UnOp; + + template + struct UnOp + { + static __device__ T call(T v) { return ~v; } + }; + + + template + __global__ void bitwiseUnOpKernel(int rows, int width, const PtrStep src, PtrStep dst) + { + const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (y < rows) + { + uchar* dst_ptr = dst.ptr(y) + x; + const uchar* src_ptr = src.ptr(y) + x; + if (x + sizeof(uint) - 1 < width) + { + *(uint*)dst_ptr = UnOp::call(*(uint*)src_ptr); + } + else + { + const uchar* src_end = src.ptr(y) + width; + while (src_ptr < src_end) + { + *dst_ptr++ = UnOp::call(*src_ptr++); + } + } + } + } + + + template + void bitwiseUnOp(int rows, int width, const PtrStep src, PtrStep dst, + cudaStream_t stream) + { + dim3 threads(16, 16); + dim3 grid(divUp(width, threads.x * sizeof(uint)), + divUp(rows, threads.y)); + + bitwiseUnOpKernel<<>>(rows, width, src, dst); + + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + __global__ void bitwiseUnOpKernel(int rows, int cols, int cn, const PtrStep src, + const PtrStep mask, PtrStep dst) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < cols && y < rows && mask.ptr(y)[x / cn]) + { + T* dst_row = (T*)dst.ptr(y); + const T* src_row = (const T*)src.ptr(y); + + dst_row[x] = UnOp::call(src_row[x]); + } + } + + + template + void bitwiseUnOp(int rows, int cols, int cn, const PtrStep src, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + dim3 threads(16, 16); + dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); + + bitwiseUnOpKernel<<>>(rows, cols, cn, src, mask, dst); + + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn, + const PtrStep src, PtrStep dst, cudaStream_t stream) + { + bitwiseUnOp(rows, cols * elem_size1 * cn, src, dst, stream); + } + + + template + void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + bitwiseUnOp(rows, cols * cn, cn, src, mask, dst, stream); + } + + template void bitwiseMaskNotCaller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskNotCaller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskNotCaller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + + + ////////////////////////////////////////////////////////////////////////// + // Binary bitwise logical matrix operations + + enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR }; + + template + struct BinOp; + + template + struct BinOp + { + static __device__ T call(T a, T b) { return a | b; } + }; + + + template + struct BinOp + { + static __device__ T call(T a, T b) { return a & b; } + }; + + template + struct BinOp + { + static __device__ T call(T a, T b) { return a ^ b; } + }; + + + template + __global__ void bitwiseBinOpKernel(int rows, int width, const PtrStep src1, + const PtrStep src2, PtrStep dst) + { + const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (y < rows) + { + uchar* dst_ptr = dst.ptr(y) + x; + const uchar* src1_ptr = src1.ptr(y) + x; + const uchar* src2_ptr = src2.ptr(y) + x; + + if (x + sizeof(uint) - 1 < width) + { + *(uint*)dst_ptr = BinOp::call(*(uint*)src1_ptr, *(uint*)src2_ptr); + } + else + { + const uchar* src1_end = src1.ptr(y) + width; + while (src1_ptr < src1_end) + { + *dst_ptr++ = BinOp::call(*src1_ptr++, *src2_ptr++); + } + } + } + } + + + template + void bitwiseBinOp(int rows, int width, const PtrStep src1, const PtrStep src2, + PtrStep dst, cudaStream_t stream) + { + dim3 threads(16, 16); + dim3 grid(divUp(width, threads.x * sizeof(uint)), divUp(rows, threads.y)); + + bitwiseBinOpKernel<<>>(rows, width, src1, src2, dst); + + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + __global__ void bitwiseBinOpKernel( + int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, + const PtrStep mask, PtrStep dst) + { + const int x = blockDim.x * blockIdx.x + threadIdx.x; + const int y = blockDim.y * blockIdx.y + threadIdx.y; + + if (x < cols && y < rows && mask.ptr(y)[x / cn]) + { + T* dst_row = (T*)dst.ptr(y); + const T* src1_row = (const T*)src1.ptr(y); + const T* src2_row = (const T*)src2.ptr(y); + + dst_row[x] = BinOp::call(src1_row[x], src2_row[x]); + } + } + + + template + void bitwiseBinOp(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + dim3 threads(16, 16); + dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); + + bitwiseBinOpKernel<<>>(rows, cols, cn, src1, src2, mask, dst); + + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, + const PtrStep src2, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * elem_size1 * cn, src1, src2, dst, stream); + } + + + template + void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * cn, cn, src1, src2, mask, dst, stream); + } + + template void bitwiseMaskOrCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskOrCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskOrCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + + + void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, + const PtrStep src2, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * elem_size1 * cn, src1, src2, dst, stream); + } + + + template + void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * cn, cn, src1, src2, mask, dst, stream); + } + + template void bitwiseMaskAndCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskAndCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskAndCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + + + void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, + const PtrStep src2, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * elem_size1 * cn, src1, src2, dst, stream); + } + + + template + void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, + const PtrStep mask, PtrStep dst, cudaStream_t stream) + { + bitwiseBinOp(rows, cols * cn, cn, src1, src2, mask, dst, stream); + } + + template void bitwiseMaskXorCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskXorCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + template void bitwiseMaskXorCaller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + +}}} diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 387d49d..fe5a0e2 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -78,6 +78,29 @@ namespace cv { namespace gpu { namespace mathfunc } } + + struct Mask8U + { + explicit Mask8U(PtrStep mask): mask(mask) {} + + __device__ bool operator()(int y, int x) const + { + return mask.ptr(y)[x]; + } + + PtrStep mask; + }; + + + struct MaskTrue + { + __device__ bool operator()(int y, int x) const + { + return true; + } + }; + + struct Nothing { static __device__ void calc(int, int, float, float, float*, size_t, float) @@ -235,313 +258,6 @@ namespace cv { namespace gpu { namespace mathfunc callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream); } -////////////////////////////////////////////////////////////////////////////////////// -// Compare - - template - struct NotEqual - { - __device__ uchar operator()(const T1& src1, const T2& src2) - { - return static_cast(static_cast(src1 != src2) * 255); - } - }; - - template - inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) - { - NotEqual op; - transform(static_cast< DevMem2D_ >(src1), static_cast< DevMem2D_ >(src2), dst, op, 0); - } - - void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) - { - compare_ne(src1, src2, dst); - } - void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst) - { - compare_ne(src1, src2, dst); - } - - -////////////////////////////////////////////////////////////////////////////// -// Per-element bit-wise logical matrix operations - - struct Mask8U - { - explicit Mask8U(PtrStep mask): mask(mask) {} - - __device__ bool operator()(int y, int x) const - { - return mask.ptr(y)[x]; - } - - PtrStep mask; - }; - - - struct MaskTrue - { - __device__ bool operator()(int y, int x) const - { - return true; - } - }; - - //------------------------------------------------------------------------ - // Unary operations - - enum { UN_OP_NOT }; - - template - struct UnOp; - - template - struct UnOp - { - static __device__ T call(T v) { return ~v; } - }; - - - template - __global__ void bitwise_un_op_kernel(int rows, int width, const PtrStep src, PtrStep dst) - { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows) - { - uchar* dst_ptr = dst.ptr(y) + x; - const uchar* src_ptr = src.ptr(y) + x; - if (x + sizeof(uint) - 1 < width) - { - *(uint*)dst_ptr = UnOp::call(*(uint*)src_ptr); - } - else - { - const uchar* src_end = src.ptr(y) + width; - while (src_ptr < src_end) - { - *dst_ptr++ = UnOp::call(*src_ptr++); - } - } - } - } - - - template - void bitwise_un_op(int rows, int width, const PtrStep src, PtrStep dst, cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(width, threads.x * sizeof(uint)), - divUp(rows, threads.y)); - - bitwise_un_op_kernel<<>>(rows, width, src, dst); - - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - template - __global__ void bitwise_un_op_kernel(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < cols && y < rows && mask.ptr(y)[x / cn]) - { - T* dst_row = (T*)dst.ptr(y); - const T* src_row = (const T*)src.ptr(y); - - dst_row[x] = UnOp::call(src_row[x]); - } - } - - - template - void bitwise_un_op(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - bitwise_un_op_kernel<<>>(rows, cols, cn, src, mask, dst); - - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - void bitwise_not_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream) - { - bitwise_un_op(rows, cols * elem_size1 * cn, src, dst, stream); - } - - - template - void bitwise_mask_not_caller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - bitwise_un_op(rows, cols * cn, cn, src, mask, dst, stream); - } - - template void bitwise_mask_not_caller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_not_caller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_not_caller(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - - //------------------------------------------------------------------------ - // Binary operations - - enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR }; - - template - struct BinOp; - - template - struct BinOp - { - static __device__ T call(T a, T b) { return a | b; } - }; - - - template - struct BinOp - { - static __device__ T call(T a, T b) { return a & b; } - }; - - template - struct BinOp - { - static __device__ T call(T a, T b) { return a ^ b; } - }; - - - template - __global__ void bitwise_bin_op_kernel(int rows, int width, const PtrStep src1, const PtrStep src2, PtrStep dst) - { - const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (y < rows) - { - uchar* dst_ptr = dst.ptr(y) + x; - const uchar* src1_ptr = src1.ptr(y) + x; - const uchar* src2_ptr = src2.ptr(y) + x; - - if (x + sizeof(uint) - 1 < width) - { - *(uint*)dst_ptr = BinOp::call(*(uint*)src1_ptr, *(uint*)src2_ptr); - } - else - { - const uchar* src1_end = src1.ptr(y) + width; - while (src1_ptr < src1_end) - { - *dst_ptr++ = BinOp::call(*src1_ptr++, *src2_ptr++); - } - } - } - } - - - template - void bitwise_bin_op(int rows, int width, const PtrStep src1, const PtrStep src2, PtrStep dst, - cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(width, threads.x * sizeof(uint)), divUp(rows, threads.y)); - - bitwise_bin_op_kernel<<>>(rows, width, src1, src2, dst); - - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - template - __global__ void bitwise_bin_op_kernel( - int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, - const PtrStep mask, PtrStep dst) - { - const int x = blockDim.x * blockIdx.x + threadIdx.x; - const int y = blockDim.y * blockIdx.y + threadIdx.y; - - if (x < cols && y < rows && mask.ptr(y)[x / cn]) - { - T* dst_row = (T*)dst.ptr(y); - const T* src1_row = (const T*)src1.ptr(y); - const T* src2_row = (const T*)src2.ptr(y); - - dst_row[x] = BinOp::call(src1_row[x], src2_row[x]); - } - } - - - template - void bitwise_bin_op(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, - const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - dim3 threads(16, 16); - dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); - - bitwise_bin_op_kernel<<>>(rows, cols, cn, src1, src2, mask, dst); - - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - void bitwise_or_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * elem_size1 * cn, src1, src2, dst, stream); - } - - - template - void bitwise_mask_or_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * cn, cn, src1, src2, mask, dst, stream); - } - - template void bitwise_mask_or_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_or_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_or_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - - - void bitwise_and_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * elem_size1 * cn, src1, src2, dst, stream); - } - - - template - void bitwise_mask_and_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * cn, cn, src1, src2, mask, dst, stream); - } - - template void bitwise_mask_and_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_and_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_and_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - - - void bitwise_xor_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * elem_size1 * cn, src1, src2, dst, stream); - } - - - template - void bitwise_mask_xor_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream) - { - bitwise_bin_op(rows, cols * cn, cn, src1, src2, mask, dst, stream); - } - - template void bitwise_mask_xor_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_xor_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - template void bitwise_mask_xor_caller(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); - - ////////////////////////////////////////////////////////////////////////////// // Min max diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index ea3f832..5d802bd 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -1,16 +1,609 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or bpied warranties, including, but not limited to, the bpied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" +using namespace cv; +using namespace cv::gpu; +#if !defined (HAVE_CUDA) +void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } +void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } +void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } +void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } +void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); } +void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&) { throw_nogpu(); } +void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int) { throw_nogpu(); } +void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } +void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } +void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } +void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); } +void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, const Stream&) { throw_nogpu(); } +cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuMat(); } +cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } +cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } +cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); } +#else -////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////// Unary per-element operations ///////////////////////////////////////// -// operation(GpuMat src, GpuMat dst) +//////////////////////////////////////////////////////////////////////// +// Basic arithmetical operations (add subtract multiply divide) +namespace +{ + typedef NppStatus (*npp_arithm_8u_t)(const Npp8u* pSrc1, int nSrc1Step, const Npp8u* pSrc2, int nSrc2Step, Npp8u* pDst, int nDstStep, + NppiSize oSizeROI, int nScaleFactor); + typedef NppStatus (*npp_arithm_32s_t)(const Npp32s* pSrc1, int nSrc1Step, const Npp32s* pSrc2, int nSrc2Step, Npp32s* pDst, + int nDstStep, NppiSize oSizeROI); + typedef NppStatus (*npp_arithm_32f_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, + int nDstStep, NppiSize oSizeROI); + void nppArithmCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, + npp_arithm_8u_t npp_func_8uc1, npp_arithm_8u_t npp_func_8uc4, + npp_arithm_32s_t npp_func_32sc1, npp_arithm_32f_t npp_func_32fc1) + { + CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); -////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////// Binary per-element operations //////////////////////////////////////// -// operation(GpuMat src1, GpuMat src2, GpuMat dst) + dst.create( src1.size(), src1.type() ); + + NppiSize sz; + sz.width = src1.cols; + sz.height = src1.rows; + + switch (src1.type()) + { + case CV_8UC1: + nppSafeCall( npp_func_8uc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, 0) ); + break; + case CV_8UC4: + nppSafeCall( npp_func_8uc4(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, 0) ); + break; + case CV_32SC1: + nppSafeCall( npp_func_32sc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_32FC1: + nppSafeCall( npp_func_32fc1(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + default: + CV_Assert(!"Unsupported source type"); + } + } + + template struct NppArithmScalarFunc; + template<> struct NppArithmScalarFunc<1> + { + typedef NppStatus (*func_ptr)(const Npp32f *pSrc, int nSrcStep, Npp32f nValue, Npp32f *pDst, + int nDstStep, NppiSize oSizeROI); + }; + template<> struct NppArithmScalarFunc<2> + { + typedef NppStatus (*func_ptr)(const Npp32fc *pSrc, int nSrcStep, Npp32fc nValue, Npp32fc *pDst, + int nDstStep, NppiSize oSizeROI); + }; + + template::func_ptr func> struct NppArithmScalar; + template::func_ptr func> struct NppArithmScalar<1, func> + { + static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) + { + dst.create(src.size(), src.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( func(src.ptr(), src.step, (Npp32f)sc[0], dst.ptr(), dst.step, sz) ); + } + }; + template::func_ptr func> struct NppArithmScalar<2, func> + { + static void calc(const GpuMat& src, const Scalar& sc, GpuMat& dst) + { + dst.create(src.size(), src.type()); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + Npp32fc nValue; + nValue.re = (Npp32f)sc[0]; + nValue.im = (Npp32f)sc[1]; + + nppSafeCall( func(src.ptr(), src.step, nValue, dst.ptr(), dst.step, sz) ); + } + }; +} + +void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R); +} + +void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R); +} + +void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + nppArithmCaller(src1, src2, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R); +} + +void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R); +} + +void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); + static const caller_t callers[] = {0, NppArithmScalar<1, nppiAddC_32f_C1R>::calc, NppArithmScalar<2, nppiAddC_32fc_C1R>::calc}; + + CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); + + callers[src.channels()](src, sc, dst); +} + +void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); + static const caller_t callers[] = {0, NppArithmScalar<1, nppiSubC_32f_C1R>::calc, NppArithmScalar<2, nppiSubC_32fc_C1R>::calc}; + + CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); + + callers[src.channels()](src, sc, dst); +} + +void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); + static const caller_t callers[] = {0, NppArithmScalar<1, nppiMulC_32f_C1R>::calc, NppArithmScalar<2, nppiMulC_32fc_C1R>::calc}; + + CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); + + callers[src.channels()](src, sc, dst); +} + +void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst) +{ + typedef void (*caller_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst); + static const caller_t callers[] = {0, NppArithmScalar<1, nppiDivC_32f_C1R>::calc, NppArithmScalar<2, nppiDivC_32fc_C1R>::calc}; + + CV_Assert(src.type() == CV_32FC1 || src.type() == CV_32FC2); + + callers[src.channels()](src, sc, dst); +} + + +////////////////////////////////////////////////////////////////////////////// +// Absolute difference + +void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst) +{ + CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); + + CV_Assert(src1.type() == CV_8UC1 || src1.type() == CV_8UC4 || src1.type() == CV_32SC1 || src1.type() == CV_32FC1); + + dst.create( src1.size(), src1.type() ); + + NppiSize sz; + sz.width = src1.cols; + sz.height = src1.rows; + + switch (src1.type()) + { + case CV_8UC1: + nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_8UC4: + nppSafeCall( nppiAbsDiff_8u_C4R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_32SC1: + nppSafeCall( nppiAbsDiff_32s_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + case CV_32FC1: + nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz) ); + break; + default: + CV_Assert(!"Unsupported source type"); + } +} + +void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst) +{ + CV_Assert(src.type() == CV_32FC1); + + dst.create( src.size(), src.type() ); + + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + + nppSafeCall( nppiAbsDiffC_32f_C1R(src.ptr(), src.step, dst.ptr(), dst.step, sz, (Npp32f)s[0]) ); +} + + +////////////////////////////////////////////////////////////////////////////// +// Comparison of two matrixes + +namespace cv { namespace gpu { namespace mathfunc +{ + void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst); + void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst); +}}} + +void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop) +{ + CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type()); + + CV_Assert(src1.type() == CV_8UC4 || src1.type() == CV_32FC1); + + dst.create( src1.size(), CV_8UC1 ); + + static const NppCmpOp nppCmpOp[] = { NPP_CMP_EQ, NPP_CMP_GREATER, NPP_CMP_GREATER_EQ, NPP_CMP_LESS, NPP_CMP_LESS_EQ }; + + NppiSize sz; + sz.width = src1.cols; + sz.height = src1.rows; + + if (src1.type() == CV_8UC4) + { + if (cmpop != CMP_NE) + { + nppSafeCall( nppiCompare_8u_C4R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, nppCmpOp[cmpop]) ); + } + else + { + mathfunc::compare_ne_8uc4(src1, src2, dst); + } + } + else + { + if (cmpop != CMP_NE) + { + nppSafeCall( nppiCompare_32f_C1R(src1.ptr(), src1.step, + src2.ptr(), src2.step, + dst.ptr(), dst.step, sz, nppCmpOp[cmpop]) ); + } + else + { + mathfunc::compare_ne_32f(src1, src2, dst); + } + } +} + + +////////////////////////////////////////////////////////////////////////////// +// Unary bitwise logical operations + +namespace cv { namespace gpu { namespace mathfunc +{ + void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream); + + template + void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream); +}}} + +namespace +{ + void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, cudaStream_t stream) + { + dst.create(src.size(), src.type()); + + cv::gpu::mathfunc::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), + dst.channels(), src, dst, stream); + } + + + void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) + { + using namespace cv::gpu; + + typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + static Caller callers[] = {mathfunc::bitwiseMaskNotCaller, mathfunc::bitwiseMaskNotCaller, + mathfunc::bitwiseMaskNotCaller, mathfunc::bitwiseMaskNotCaller, + mathfunc::bitwiseMaskNotCaller, mathfunc::bitwiseMaskNotCaller, + mathfunc::bitwiseMaskNotCaller}; + + CV_Assert(mask.type() == CV_8U && mask.size() == src.size()); + dst.create(src.size(), src.type()); + + Caller caller = callers[src.depth()]; + CV_Assert(caller); + + int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int)); + caller(src.rows, src.cols, cn, src, mask, dst, stream); + } + +} + + +void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask) +{ + if (mask.empty()) + ::bitwiseNotCaller(src, dst, 0); + else + ::bitwiseNotCaller(src, dst, mask, 0); +} + + +void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, const Stream& stream) +{ + if (mask.empty()) + ::bitwiseNotCaller(src, dst, StreamAccessor::getStream(stream)); + else + ::bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream)); +} + + +cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat& src) +{ + GpuMat dst; + bitwise_not(src, dst); + return dst; +} + + +////////////////////////////////////////////////////////////////////////////// +// Binary bitwise logical operations + +namespace cv { namespace gpu { namespace mathfunc +{ + void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); + + template + void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); + + void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); + + template + void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); + + void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream); + + template + void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream); +}}} + + +namespace +{ + void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) + { + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + cv::gpu::mathfunc::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), + dst.channels(), src1, src2, dst, stream); + } + + + void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) + { + using namespace cv::gpu; + + typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + static Caller callers[] = {mathfunc::bitwiseMaskOrCaller, mathfunc::bitwiseMaskOrCaller, + mathfunc::bitwiseMaskOrCaller, mathfunc::bitwiseMaskOrCaller, + mathfunc::bitwiseMaskOrCaller, mathfunc::bitwiseMaskOrCaller, + mathfunc::bitwiseMaskOrCaller}; + + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + Caller caller = callers[src1.depth()]; + CV_Assert(caller); + + int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); + caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); + } + + + void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) + { + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + cv::gpu::mathfunc::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), + dst.channels(), src1, src2, dst, stream); + } + + + void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) + { + using namespace cv::gpu; + + typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + static Caller callers[] = {mathfunc::bitwiseMaskAndCaller, mathfunc::bitwiseMaskAndCaller, + mathfunc::bitwiseMaskAndCaller, mathfunc::bitwiseMaskAndCaller, + mathfunc::bitwiseMaskAndCaller, mathfunc::bitwiseMaskAndCaller, + mathfunc::bitwiseMaskAndCaller}; + + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + Caller caller = callers[src1.depth()]; + CV_Assert(caller); + + int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); + caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); + } + + + void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) + { + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + cv::gpu::mathfunc::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), + dst.channels(), src1, src2, dst, stream); + } + + + void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) + { + using namespace cv::gpu; + + typedef void (*Caller)(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t); + static Caller callers[] = {mathfunc::bitwiseMaskXorCaller, mathfunc::bitwiseMaskXorCaller, + mathfunc::bitwiseMaskXorCaller, mathfunc::bitwiseMaskXorCaller, + mathfunc::bitwiseMaskXorCaller, mathfunc::bitwiseMaskXorCaller, + mathfunc::bitwiseMaskXorCaller}; + + CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); + dst.create(src1.size(), src1.type()); + + Caller caller = callers[src1.depth()]; + CV_Assert(caller); + + int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); + caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream); + } +} + + +void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) +{ + if (mask.empty()) + ::bitwiseOrCaller(src1, src2, dst, 0); + else + ::bitwiseOrCaller(src1, src2, dst, mask, 0); +} + + +void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) +{ + if (mask.empty()) + ::bitwiseOrCaller(src1, src2, dst, StreamAccessor::getStream(stream)); + else + ::bitwiseOrCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); +} + + +void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) +{ + if (mask.empty()) + ::bitwiseAndCaller(src1, src2, dst, 0); + else + ::bitwiseAndCaller(src1, src2, dst, mask, 0); +} + + +void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) +{ + if (mask.empty()) + ::bitwiseAndCaller(src1, src2, dst, StreamAccessor::getStream(stream)); + else + ::bitwiseAndCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); +} + + +void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask) +{ + if (mask.empty()) + ::bitwiseXorCaller(src1, src2, dst, 0); + else + ::bitwiseXorCaller(src1, src2, dst, mask, 0); +} + + +void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, const Stream& stream) +{ + if (mask.empty()) + ::bitwiseXorCaller(src1, src2, dst, StreamAccessor::getStream(stream)); + else + ::bitwiseXorCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream)); +} + + +cv::gpu::GpuMat cv::gpu::operator | (const GpuMat& src1, const GpuMat& src2) +{ + GpuMat dst; + bitwise_or(src1, src2, dst); + return dst; +} + + +cv::gpu::GpuMat cv::gpu::operator & (const GpuMat& src1, const GpuMat& src2) +{ + GpuMat dst; + bitwise_and(src1, src2, dst); + return dst; +} + + +cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2) +{ + GpuMat dst; + bitwise_xor(src1, src2, dst); + return dst; +} + +#endif \ No newline at end of file diff --git a/tests/gpu/src/bitwise_oper.cpp b/tests/gpu/src/bitwise_oper.cpp index 9e52e76..d391ff9 100644 --- a/tests/gpu/src/bitwise_oper.cpp +++ b/tests/gpu/src/bitwise_oper.cpp @@ -53,7 +53,7 @@ using namespace std; struct CV_GpuBitwiseTest: public CvTest { - CV_GpuBitwiseTest(): CvTest("GPU-BitwiseOpers", "bitwiseMatOperators") {} + CV_GpuBitwiseTest(): CvTest("GPU-BitwiseOpersTest", "bitwiseMatOperators") {} void run(int) { -- 2.7.4