From 69be49bac1c2f19adee806cf39e802df436ecaad Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 16 Apr 2013 13:03:34 +0400 Subject: [PATCH] refactored GpuMat: * switch to InputArray/OutputArray * add Stream support --- modules/core/include/opencv2/core/gpu.hpp | 61 +++++-- modules/core/include/opencv2/core/gpu.inl.hpp | 40 ++++- modules/core/src/gpu_mat.cpp | 218 ++++++++++++++++++-------- modules/core/src/gpu_stream.cpp | 80 +--------- modules/core/src/matrix.cpp | 3 + 5 files changed, 240 insertions(+), 162 deletions(-) diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp index f0050f2..cab7df5 100644 --- a/modules/core/include/opencv2/core/gpu.hpp +++ b/modules/core/include/opencv2/core/gpu.hpp @@ -84,8 +84,8 @@ public: GpuMat(const GpuMat& m, Range rowRange, Range colRange); GpuMat(const GpuMat& m, Rect roi); - //! builds GpuMat from Mat. Perfom blocking upload to device - explicit GpuMat(const Mat& m); + //! builds GpuMat from host memory (Blocking call) + explicit GpuMat(InputArray arr); //! destructor - calls release() ~GpuMat(); @@ -103,26 +103,59 @@ public: //! swaps with other smart pointer void swap(GpuMat& mat); - //! pefroms blocking upload data to GpuMat - void upload(const Mat& m); + //! pefroms upload data to GpuMat (Blocking call) + void upload(InputArray arr); - //! downloads data from device to host memory (Blocking calls) - void download(Mat& m) const; + //! pefroms upload data to GpuMat (Non-Blocking call) + void upload(InputArray arr, Stream& stream); + + //! pefroms download data from device to host memory (Blocking call) + void download(OutputArray dst) const; + + //! pefroms download data from device to host memory (Non-Blocking call) + void download(OutputArray dst, Stream& stream) const; //! returns deep copy of the GpuMat, i.e. the data is copied GpuMat clone() const; - //! copies the GpuMat content to "m" - void copyTo(GpuMat& m) const; + //! copies the GpuMat content to device memory (Blocking call) + void copyTo(OutputArray dst) const; + + //! copies the GpuMat content to device memory (Non-Blocking call) + void copyTo(OutputArray dst, Stream& stream) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) + void copyTo(OutputArray dst, InputArray mask) const; + + //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) + void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; + + //! sets some of the GpuMat elements to s (Blocking call) + GpuMat& setTo(Scalar s); + + //! sets some of the GpuMat elements to s (Non-Blocking call) + GpuMat& setTo(Scalar s, Stream& stream); + + //! sets some of the GpuMat elements to s, according to the mask (Blocking call) + GpuMat& setTo(Scalar s, InputArray mask); + + //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) + GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); + + //! converts GpuMat to another datatype (Blocking call) + void convertTo(OutputArray dst, int rtype) const; + + //! converts GpuMat to another datatype (Non-Blocking call) + void convertTo(OutputArray dst, int rtype, Stream& stream) const; - //! copies those GpuMat elements to "m" that are marked with non-zero mask elements - void copyTo(GpuMat& m, const GpuMat& mask) const; + //! converts GpuMat to another datatype with scaling (Blocking call) + void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; - //! sets some of the GpuMat elements to s, according to the mask - GpuMat& setTo(Scalar s, const GpuMat& mask = GpuMat()); + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; - //! converts GpuMat to another datatype with optional scaling - void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const; + //! converts GpuMat to another datatype with scaling (Non-Blocking call) + void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; void assignTo(GpuMat& m, int type=-1) const; diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp index cf295a0..acc1f2d 100644 --- a/modules/core/include/opencv2/core/gpu.inl.hpp +++ b/modules/core/include/opencv2/core/gpu.inl.hpp @@ -103,10 +103,10 @@ GpuMat::GpuMat(const GpuMat& m) } inline -GpuMat::GpuMat(const Mat& m) : +GpuMat::GpuMat(InputArray arr) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { - upload(m); + upload(arr); } inline @@ -155,6 +155,42 @@ GpuMat GpuMat::clone() const } inline +void GpuMat::copyTo(OutputArray dst, InputArray mask) const +{ + copyTo(dst, mask, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s) +{ + return setTo(s, Stream::Null()); +} + +inline +GpuMat& GpuMat::setTo(Scalar s, InputArray mask) +{ + return setTo(s, mask, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype) const +{ + convertTo(dst, rtype, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const +{ + convertTo(dst, rtype, alpha, beta, Stream::Null()); +} + +inline +void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const +{ + convertTo(dst, rtype, alpha, 0.0, stream); +} + +inline void GpuMat::assignTo(GpuMat& m, int _type) const { if (_type < 0) diff --git a/modules/core/src/gpu_mat.cpp b/modules/core/src/gpu_mat.cpp index 144828b..fb8251c 100644 --- a/modules/core/src/gpu_mat.cpp +++ b/modules/core/src/gpu_mat.cpp @@ -328,18 +328,9 @@ namespace // Dispatcher -namespace cv { namespace gpu -{ - void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0); - void convert(const GpuMat& src, GpuMat& dst, cudaStream_t stream = 0); - void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0); - void set(GpuMat& m, Scalar s, cudaStream_t stream = 0); - void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream = 0); -}} - -namespace cv { namespace gpu +namespace { - void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) + void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0) { CV_DbgAssert( src.size() == dst.size() && src.type() == dst.type() ); @@ -368,7 +359,7 @@ namespace cv { namespace gpu func(src, dst, mask, stream); } - void convert(const GpuMat& src, GpuMat& dst, cudaStream_t stream) + void convert(const GpuMat& src, GpuMat& dst, cudaStream_t stream = 0) { CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() ); @@ -461,7 +452,7 @@ namespace cv { namespace gpu func(src, dst, stream); } - void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream) + void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0) { CV_DbgAssert( src.size() == dst.size() && src.channels() == dst.channels() ); @@ -476,7 +467,7 @@ namespace cv { namespace gpu cudaConvert(src, dst, alpha, beta, stream); } - void set(GpuMat& m, Scalar s, cudaStream_t stream) + void set(GpuMat& m, Scalar s, cudaStream_t stream = 0) { if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) { @@ -524,7 +515,7 @@ namespace cv { namespace gpu funcs[m.depth()][m.channels() - 1](m, s, stream); } - void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream) + void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream = 0) { CV_DbgAssert( !mask.empty() ); @@ -549,7 +540,7 @@ namespace cv { namespace gpu funcs[m.depth()][m.channels() - 1](m, s, mask, stream); } -}} +} #endif // HAVE_CUDA @@ -723,127 +714,216 @@ void cv::gpu::GpuMat::release() #endif } -void cv::gpu::GpuMat::upload(const Mat& m) +void cv::gpu::GpuMat::upload(InputArray arr) +{ +#ifndef HAVE_CUDA + (void) arr; + throw_no_cuda(); +#else + Mat mat = arr.getMat(); + + CV_DbgAssert( !mat.empty() ); + + create(mat.size(), mat.type()); + + cudaSafeCall( cudaMemcpy2D(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); +#endif +} + +void cv::gpu::GpuMat::upload(InputArray arr, Stream& _stream) { #ifndef HAVE_CUDA - (void) m; + (void) arr; + (void) _stream; throw_no_cuda(); #else - CV_DbgAssert( !m.empty() ); + Mat mat = arr.getMat(); - create(m.size(), m.type()); + CV_DbgAssert( !mat.empty() ); - cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); + create(mat.size(), mat.type()); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + cudaSafeCall( cudaMemcpy2DAsync(data, step, mat.data, mat.step, cols * elemSize(), rows, cudaMemcpyHostToDevice, stream) ); #endif } -void cv::gpu::GpuMat::download(Mat& m) const +void cv::gpu::GpuMat::download(OutputArray _dst) const { #ifndef HAVE_CUDA - (void) m; + (void) _dst; throw_no_cuda(); #else CV_DbgAssert( !empty() ); - m.create(size(), type()); + _dst.create(size(), type()); + Mat dst = _dst.getMat(); - cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); + cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); #endif } -void cv::gpu::GpuMat::copyTo(GpuMat& m) const +void cv::gpu::GpuMat::download(OutputArray _dst, Stream& _stream) const { #ifndef HAVE_CUDA - (void) m; + (void) _dst; + (void) _stream; throw_no_cuda(); #else CV_DbgAssert( !empty() ); - m.create(size(), type()); + _dst.create(size(), type()); + Mat dst = _dst.getMat(); - cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); + cudaStream_t stream = StreamAccessor::getStream(_stream); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost, stream) ); #endif } -void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const +void cv::gpu::GpuMat::copyTo(OutputArray _dst) const { #ifndef HAVE_CUDA - (void) mat; - (void) mask; + (void) _dst; throw_no_cuda(); #else CV_DbgAssert( !empty() ); - if (mask.empty()) - { - copyTo(mat); - } - else - { - mat.create(size(), type()); + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); - copyWithMask(*this, mat, mask); - } + cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); +#endif +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, Stream& _stream) const +{ +#ifndef HAVE_CUDA + (void) _dst; + (void) _stream; + throw_no_cuda(); +#else + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice, stream) ); +#endif +} + +void cv::gpu::GpuMat::copyTo(OutputArray _dst, InputArray _mask, Stream& _stream) const +{ +#ifndef HAVE_CUDA + (void) _dst; + (void) _mask; + (void) _stream; + throw_no_cuda(); +#else + CV_DbgAssert( !empty() ); + + _dst.create(size(), type()); + GpuMat dst = _dst.getGpuMat(); + + GpuMat mask = _mask.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + ::copyWithMask(*this, dst, mask, stream); #endif } -GpuMat& cv::gpu::GpuMat::setTo(Scalar s, const GpuMat& mask) +GpuMat& cv::gpu::GpuMat::setTo(Scalar s, Stream& _stream) { #ifndef HAVE_CUDA (void) s; - (void) mask; + (void) _stream; throw_no_cuda(); - return *this; #else CV_DbgAssert( !empty() ); - if (mask.empty()) - set(*this, s); - else - set(*this, s, mask); + cudaStream_t stream = StreamAccessor::getStream(_stream); + ::set(*this, s, stream); +#endif return *this; +} + +GpuMat& cv::gpu::GpuMat::setTo(Scalar s, InputArray _mask, Stream& _stream) +{ +#ifndef HAVE_CUDA + (void) s; + (void) _mask; + (void) _stream; + throw_no_cuda(); +#else + CV_DbgAssert( !empty() ); + + GpuMat mask = _mask.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + ::set(*this, s, mask, stream); #endif + + return *this; } -void cv::gpu::GpuMat::convertTo(GpuMat& dst, int rtype, double alpha, double beta) const +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, Stream& _stream) const { #ifndef HAVE_CUDA - (void) dst; + (void) _dst; (void) rtype; - (void) alpha; - (void) beta; + (void) _stream; throw_no_cuda(); #else - bool noScale = fabs(alpha - 1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); - if (rtype < 0) rtype = type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - int sdepth = depth(); - int ddepth = CV_MAT_DEPTH(rtype); - if (sdepth == ddepth && noScale) + const int sdepth = depth(); + const int ddepth = CV_MAT_DEPTH(rtype); + if (sdepth == ddepth) { - copyTo(dst); + if (_stream) + copyTo(_dst, _stream); + else + copyTo(_dst); + return; } - GpuMat temp; - const GpuMat* psrc = this; - if (sdepth != ddepth && psrc == &dst) - { - temp = *this; - psrc = &temp; - } + GpuMat src = *this; - dst.create(size(), rtype); + _dst.create(size(), rtype); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + ::convert(src, dst, stream); +#endif +} - if (noScale) - convert(*psrc, dst); +void cv::gpu::GpuMat::convertTo(OutputArray _dst, int rtype, double alpha, double beta, Stream& _stream) const +{ +#ifndef HAVE_CUDA + (void) _dst; + (void) rtype; + (void) alpha; + (void) beta; + (void) _stream; + throw_no_cuda(); +#else + if (rtype < 0) + rtype = type(); else - convert(*psrc, dst, alpha, beta); + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); + + GpuMat src = *this; + + _dst.create(size(), rtype); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + ::convert(src, dst, alpha, beta, stream); #endif } diff --git a/modules/core/src/gpu_stream.cpp b/modules/core/src/gpu_stream.cpp index cebaaa3..4a911fe 100644 --- a/modules/core/src/gpu_stream.cpp +++ b/modules/core/src/gpu_stream.cpp @@ -70,14 +70,6 @@ void cv::gpu::Stream::release() { throw_no_cuda(); } #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace gpu -{ - void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0); - void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0); - void set(GpuMat& m, Scalar s, cudaStream_t stream = 0); - void set(GpuMat& m, Scalar s, const GpuMat& mask, cudaStream_t stream = 0); -}} - struct Stream::Impl { static cudaStream_t getStream(const Impl* impl) @@ -189,83 +181,17 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val) { - const int sdepth = src.depth(); - - if (sdepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); - } - - cudaStream_t stream = Impl::getStream(impl); - - if (val[0] == 0.0 && val[1] == 0.0 && val[2] == 0.0 && val[3] == 0.0) - { - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, stream) ); - return; - } - - if (sdepth == CV_8U) - { - int cn = src.channels(); - - if (cn == 1 || (cn == 2 && val[0] == val[1]) || (cn == 3 && val[0] == val[1] && val[0] == val[2]) || (cn == 4 && val[0] == val[1] && val[0] == val[2] && val[0] == val[3])) - { - int ival = saturate_cast(val[0]); - cudaSafeCall( cudaMemset2DAsync(src.data, src.step, ival, src.cols * src.elemSize(), src.rows, stream) ); - return; - } - } - - set(src, val, stream); + src.setTo(val, *this); } void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask) { - const int sdepth = src.depth(); - - if (sdepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); - } - - CV_Assert(mask.type() == CV_8UC1); - - cudaStream_t stream = Impl::getStream(impl); - - set(src, val, mask, stream); + src.setTo(val, mask, *this); } void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double alpha, double beta) { - if (dtype < 0) - dtype = src.type(); - else - dtype = CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()); - - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); - } - - bool noScale = fabs(alpha - 1) < std::numeric_limits::epsilon() - && fabs(beta) < std::numeric_limits::epsilon(); - - if (sdepth == ddepth && noScale) - { - enqueueCopy(src, dst); - return; - } - - dst.create(src.size(), dtype); - - cudaStream_t stream = Impl::getStream(impl); - convert(src, dst, alpha, beta, stream); + src.convertTo(dst, dtype, alpha, beta, *this); } #if CUDART_VERSION >= 5000 diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 2f3e002..053dd1c 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1132,6 +1132,9 @@ gpu::GpuMat _InputArray::getGpuMat() const return gpu::GpuMat(); } + if (k == NONE) + return gpu::GpuMat(); + CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for gpu::GpuMat and gpu::CudaMem"); return gpu::GpuMat(); } -- 2.7.4