From: Vladislav Vinogradov Date: Wed, 24 Dec 2014 10:38:02 +0000 (+0300) Subject: use new getInputMat/getOutputMat/syncOutput methods in cudaarithm routines X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~2738^2~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7454189c2a9fadadb7f3ddfc8b24a2070b70cda6;p=platform%2Fupstream%2Fopencv.git use new getInputMat/getOutputMat/syncOutput methods in cudaarithm routines --- diff --git a/modules/cudaarithm/src/arithm.cpp b/modules/cudaarithm/src/arithm.cpp index 63246ab..b2107dd 100644 --- a/modules/cudaarithm/src/arithm.cpp +++ b/modules/cudaarithm/src/arithm.cpp @@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray #else // CUBLAS works with column-major matrices - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); - GpuMat src3 = _src3.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); + GpuMat src3 = getInputMat(_src3, stream); CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 ); CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) ); @@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray CV_Assert( src1Size.width == src2Size.height ); CV_Assert( src3.empty() || src3Size == dstSize ); - _dst.create(dstSize, src1.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream); if (beta != 0) { @@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray } cublasSafeCall( cublasDestroy_v2(handle) ); + + syncOutput(dst, _dst, stream); #endif } @@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, (void) stream; throw_no_cuda(); #else - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); @@ -462,16 +463,15 @@ namespace void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream) { - GpuMat image = _image.getGpuMat(); - GpuMat templ = _templ.getGpuMat(); + GpuMat image = getInputMat(_image, _stream); + GpuMat templ = getInputMat(_templ, _stream); CV_Assert( image.type() == CV_32FC1 ); CV_Assert( templ.type() == CV_32FC1 ); create(image.size(), templ.size()); - _result.create(result_size, CV_32FC1); - GpuMat result = _result.getGpuMat(); + GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream); cudaStream_t stream = StreamAccessor::getStream(_stream); @@ -520,6 +520,8 @@ namespace cufftSafeCall( cufftDestroy(planR2C) ); cufftSafeCall( cufftDestroy(planC2R) ); + + syncOutput(result, _result, _stream); } } diff --git a/modules/cudaarithm/src/core.cpp b/modules/cudaarithm/src/core.cpp index eb71d6a..7dd51f9 100644 --- a/modules/cudaarithm/src/core.cpp +++ b/modules/cudaarithm/src/core.cpp @@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str {NppMirror::call, 0, NppMirror::call, NppMirror::call} }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudaarithm/src/cuda/add_weighted.cu b/modules/cudaarithm/src/cuda/add_weighted.cu index d5c00f6..9293010 100644 --- a/modules/cudaarithm/src/cuda/add_weighted.cu +++ b/modules/cudaarithm/src/cuda/add_weighted.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -63,7 +66,7 @@ namespace __device__ __forceinline__ D operator ()(T1 a, T2 b) const { - return saturate_cast(a * alpha + b * beta + gamma); + return cudev::saturate_cast(a * alpha + b * beta + gamma); } }; @@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou } }; - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); int sdepth1 = src1.depth(); int sdepth2 = src2.depth(); @@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2); const int cn = src1.channels(); - CV_DbgAssert( src2.size() == src1.size() && src2.channels() == cn ); - CV_DbgAssert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F ); + CV_Assert( src2.size() == src1.size() && src2.channels() == cn ); + CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F ); - _dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream); - GpuMat src1_ = src1.reshape(1); - GpuMat src2_ = src2.reshape(1); - GpuMat dst_ = dst.reshape(1); + GpuMat src1_single = src1.reshape(1); + GpuMat src2_single = src2.reshape(1); + GpuMat dst_single = dst.reshape(1); if (sdepth1 > sdepth2) { - src1_.swap(src2_); + src1_single.swap(src2_single); std::swap(alpha, beta); std::swap(sdepth1, sdepth2); } @@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src1_, alpha, src2_, beta, gamma, dst_, stream); + func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream); + + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/cuda/bitwise_mat.cu b/modules/cudaarithm/src/cuda/bitwise_mat.cu index b2bf288..f151c1a 100644 --- a/modules/cudaarithm/src/cuda/bitwise_mat.cu +++ b/modules/cudaarithm/src/cuda/bitwise_mat.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); @@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream) { - GpuMat src = _src.getGpuMat(); - GpuMat mask = _mask.getGpuMat(); + GpuMat src = getInputMat(_src, stream); + GpuMat mask = getInputMat(_mask, stream); const int depth = src.depth(); CV_DbgAssert( depth <= CV_32F ); CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); if (mask.empty()) { @@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, gridTransformUnary(vsrc, vdst, bit_not(), singleMaskChannels(globPtr(mask), src.channels()), stream); } } + + syncOutput(dst, _dst, stream); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/cudaarithm/src/cuda/copy_make_border.cu b/modules/cudaarithm/src/cuda/copy_make_border.cu index f7dd91f..ce9cda3 100644 --- a/modules/cudaarithm/src/cuda/copy_make_border.cu +++ b/modules/cudaarithm/src/cuda/copy_make_border.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo { copyMakeBorderImpl , 0 /*copyMakeBorderImpl*/, copyMakeBorderImpl , copyMakeBorderImpl } }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); const int depth = src.depth(); const int cn = src.channels(); @@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo CV_Assert( depth <= CV_32F && cn <= 4 ); CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); - _dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream); const func_t func = funcs[depth][cn - 1]; @@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src, dst, top, left, borderType, value, stream); + + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/cuda/lut.cu b/modules/cudaarithm/src/cuda/lut.cu index 0b1fe8b..56efb8f 100644 --- a/modules/cudaarithm/src/cuda/lut.cu +++ b/modules/cudaarithm/src/cuda/lut.cu @@ -50,8 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -165,7 +167,7 @@ namespace void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); const int cn = src.channels(); const int lut_cn = d_lut.channels(); @@ -173,8 +175,7 @@ namespace CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); CV_Assert( lut_cn == 1 || lut_cn == cn ); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); if (lut_cn == 1) { @@ -196,6 +197,8 @@ namespace dst3.assign(lut_(src3, tbl), stream); } + + syncOutput(dst, _dst, stream); } } diff --git a/modules/cudaarithm/src/cuda/math.cu b/modules/cudaarithm/src/cuda/math.cu index 39f8220..41d762f 100644 --- a/modules/cudaarithm/src/cuda/math.cu +++ b/modules/cudaarithm/src/cuda/math.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream) absMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert( depth <= CV_64F ); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), dst.reshape(1), stream); - funcs[depth](src.reshape(1), dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } ////////////////////////////////////////////////////////////////////////////// @@ -113,7 +115,7 @@ namespace { __device__ __forceinline__ T operator ()(T x) const { - return saturate_cast(x * x); + return cudev::saturate_cast(x * x); } }; @@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream) sqrMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert( depth <= CV_64F ); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), dst.reshape(1), stream); - funcs[depth](src.reshape(1), dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } ////////////////////////////////////////////////////////////////////////////// @@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream) sqrtMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert( depth <= CV_64F ); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), dst.reshape(1), stream); - funcs[depth](src.reshape(1), dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } //////////////////////////////////////////////////////////////////////// @@ -198,7 +198,7 @@ namespace __device__ __forceinline__ T operator ()(T x) const { exp_func f; - return saturate_cast(f(x)); + return cudev::saturate_cast(f(x)); } }; @@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream) expMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert( depth <= CV_64F ); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), dst.reshape(1), stream); - funcs[depth](src.reshape(1), dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } //////////////////////////////////////////////////////////////////////// @@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream) logMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert( depth <= CV_64F ); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), dst.reshape(1), stream); - funcs[depth](src.reshape(1), dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } //////////////////////////////////////////////////////////////////////// @@ -284,7 +282,7 @@ namespace __device__ __forceinline__ T operator()(T e) const { - return saturate_cast(__powf((float)e, power)); + return cudev::saturate_cast(__powf((float)e, power)); } }; template struct PowOp : unary_function @@ -293,7 +291,7 @@ namespace __device__ __forceinline__ T operator()(T e) const { - T res = saturate_cast(__powf((float)e, power)); + T res = cudev::saturate_cast(__powf((float)e, power)); if ((e < 0) && (1 & static_cast(power))) res *= -1; @@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre powMat }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - const int depth = src.depth(); + CV_Assert( src.depth() <= CV_64F ); - CV_DbgAssert(depth <= CV_64F); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream); - funcs[depth](src.reshape(1), power, dst.reshape(1), stream); + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/cuda/mul_spectrums.cu b/modules/cudaarithm/src/cuda/mul_spectrums.cu index b060904..bd62f99 100644 --- a/modules/cudaarithm/src/cuda/mul_spectrums.cu +++ b/modules/cudaarithm/src/cuda/mul_spectrums.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; ////////////////////////////////////////////////////////////////////////////// @@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst { (void) flags; - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 ); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), CV_32FC2); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream); if (conjB) gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), comlex_mul_conj(), stream); else gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), comlex_mul(), stream); + + syncOutput(dst, _dst, stream); } void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream) { (void) flags; - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), CV_32FC2); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream); if (conjB) { @@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr op.scale = scale; gridTransformBinary(globPtr(src1), globPtr(src2), globPtr(dst), op, stream); } + + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/cuda/polar_cart.cu b/modules/cudaarithm/src/cuda/polar_cart.cu index 200b79c..0a949b4 100644 --- a/modules/cudaarithm/src/cuda/polar_cart.cu +++ b/modules/cudaarithm/src/cuda/polar_cart.cu @@ -50,55 +50,59 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) { - GpuMat x = _x.getGpuMat(); - GpuMat y = _y.getGpuMat(); + GpuMat x = getInputMat(_x, stream); + GpuMat y = getInputMat(_y, stream); - CV_DbgAssert( x.depth() == CV_32F ); - CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); + CV_Assert( x.depth() == CV_32F ); + CV_Assert( y.type() == x.type() && y.size() == x.size() ); - _dst.create(x.size(), CV_32FC1); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); GpuMat_ xc(x.reshape(1)); GpuMat_ yc(y.reshape(1)); GpuMat_ magc(dst.reshape(1)); gridTransformBinary(xc, yc, magc, magnitude_func(), stream); + + syncOutput(dst, _dst, stream); } void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) { - GpuMat x = _x.getGpuMat(); - GpuMat y = _y.getGpuMat(); + GpuMat x = getInputMat(_x, stream); + GpuMat y = getInputMat(_y, stream); - CV_DbgAssert( x.depth() == CV_32F ); - CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); + CV_Assert( x.depth() == CV_32F ); + CV_Assert( y.type() == x.type() && y.size() == x.size() ); - _dst.create(x.size(), CV_32FC1); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); GpuMat_ xc(x.reshape(1)); GpuMat_ yc(y.reshape(1)); GpuMat_ magc(dst.reshape(1)); gridTransformBinary(xc, yc, magc, magnitude_sqr_func(), stream); + + syncOutput(dst, _dst, stream); } void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream) { - GpuMat x = _x.getGpuMat(); - GpuMat y = _y.getGpuMat(); + GpuMat x = getInputMat(_x, stream); + GpuMat y = getInputMat(_y, stream); - CV_DbgAssert( x.depth() == CV_32F ); - CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); + CV_Assert( x.depth() == CV_32F ); + CV_Assert( y.type() == x.type() && y.size() == x.size() ); - _dst.create(x.size(), CV_32FC1); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream); GpuMat_ xc(x.reshape(1)); GpuMat_ yc(y.reshape(1)); @@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI gridTransformBinary(xc, yc, anglec, direction_func(), stream); else gridTransformBinary(xc, yc, anglec, direction_func(), stream); + + syncOutput(dst, _dst, stream); } void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) { - GpuMat x = _x.getGpuMat(); - GpuMat y = _y.getGpuMat(); - - CV_DbgAssert( x.depth() == CV_32F ); - CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); + GpuMat x = getInputMat(_x, stream); + GpuMat y = getInputMat(_y, stream); - _mag.create(x.size(), CV_32FC1); - GpuMat mag = _mag.getGpuMat(); + CV_Assert( x.depth() == CV_32F ); + CV_Assert( y.type() == x.type() && y.size() == x.size() ); - _angle.create(x.size(), CV_32FC1); - GpuMat angle = _angle.getGpuMat(); + GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream); + GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream); GpuMat_ xc(x.reshape(1)); GpuMat_ yc(y.reshape(1)); @@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu binaryTupleAdapter<0, 1>(direction_func())), stream); } + + syncOutput(mag, _mag, stream); + syncOutput(angle, _angle, stream); } namespace @@ -173,17 +179,14 @@ namespace void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream) { - GpuMat mag = _mag.getGpuMat(); - GpuMat angle = _angle.getGpuMat(); - - CV_DbgAssert( angle.depth() == CV_32F ); - CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) ); + GpuMat mag = getInputMat(_mag, _stream); + GpuMat angle = getInputMat(_angle, _stream); - _x.create(angle.size(), CV_32FC1); - GpuMat x = _x.getGpuMat(); + CV_Assert( angle.depth() == CV_32F ); + CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) ); - _y.create(angle.size(), CV_32FC1); - GpuMat y = _y.getGpuMat(); + GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream); + GpuMat y = getOutputMat(_y, angle.size(), CV_32FC1, _stream); GpuMat_ xc(x.reshape(1)); GpuMat_ yc(y.reshape(1)); @@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O CV_CUDEV_SAFE_CALL( cudaGetLastError() ); + syncOutput(x, _x, _stream); + syncOutput(y, _y, _stream); + if (stream == 0) CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); } diff --git a/modules/cudaarithm/src/cuda/reduce.cu b/modules/cudaarithm/src/cuda/reduce.cu index 2cb2dac..5fb9028 100644 --- a/modules/cudaarithm/src/cuda/reduce.cu +++ b/modules/cudaarithm/src/cuda/reduce.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -125,7 +128,7 @@ namespace void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); CV_Assert( src.channels() <= 4 ); CV_Assert( dim == 0 || dim == 1 ); @@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, if (dtype < 0) dtype = src.depth(); - _dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream); if (dim == 0) { @@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, func(src, dst, reduceOp, stream); } + + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/cuda/split_merge.cu b/modules/cudaarithm/src/cuda/split_merge.cu index 13d6a34..5b3af10 100644 --- a/modules/cudaarithm/src/cuda/split_merge.cu +++ b/modules/cudaarithm/src/cuda/split_merge.cu @@ -50,7 +50,10 @@ #include "opencv2/cudaarithm.hpp" #include "opencv2/cudev.hpp" +#include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; //////////////////////////////////////////////////////////////////////// @@ -92,20 +95,18 @@ namespace void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream) { - CV_DbgAssert( src != 0 ); - CV_DbgAssert( n > 0 && n <= 4 ); + CV_Assert( src != 0 ); + CV_Assert( n > 0 && n <= 4 ); const int depth = src[0].depth(); const cv::Size size = src[0].size(); -#ifdef _DEBUG for (size_t i = 0; i < n; ++i) { CV_Assert( src[i].size() == size ); CV_Assert( src[i].depth() == depth ); CV_Assert( src[i].channels() == 1 ); } -#endif if (n == 1) { @@ -123,8 +124,7 @@ namespace const int channels = static_cast(n); - _dst.create(size, CV_MAKE_TYPE(depth, channels)); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream); const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2]; @@ -132,6 +132,8 @@ namespace CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type"); func(src, dst, stream); + + syncOutput(dst, _dst, stream); } } } @@ -203,12 +205,12 @@ namespace {SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call} }; - CV_DbgAssert( dst != 0 ); + CV_Assert( dst != 0 ); const int depth = src.depth(); const int channels = src.channels(); - CV_DbgAssert( channels <= 4 ); + CV_Assert( channels <= 4 ); if (channels == 0) return; @@ -233,13 +235,13 @@ namespace void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); splitImpl(src, dst, stream); } void cv::cuda::split(InputArray _src, std::vector& dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); dst.resize(src.channels()); if (src.channels() > 0) splitImpl(src, &dst[0], stream); diff --git a/modules/cudaarithm/src/cuda/threshold.cu b/modules/cudaarithm/src/cuda/threshold.cu index 21665cb..a5b8f07 100644 --- a/modules/cudaarithm/src/cuda/threshold.cu +++ b/modules/cudaarithm/src/cuda/threshold.cu @@ -52,6 +52,8 @@ #include "opencv2/cudev.hpp" #include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; namespace @@ -95,15 +97,14 @@ namespace double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); const int depth = src.depth(); - CV_DbgAssert( src.channels() == 1 && depth <= CV_64F ); - CV_DbgAssert( type <= 4 /*THRESH_TOZERO_INV*/ ); + CV_Assert( src.channels() == 1 && depth <= CV_64F ); + CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ ); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/) { @@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou funcs[depth](src, dst, thresh, maxVal, type, stream); } + syncOutput(dst, _dst, stream); + return thresh; } diff --git a/modules/cudaarithm/src/cuda/transpose.cu b/modules/cudaarithm/src/cuda/transpose.cu index aa85004..bfe50bd 100644 --- a/modules/cudaarithm/src/cuda/transpose.cu +++ b/modules/cudaarithm/src/cuda/transpose.cu @@ -52,18 +52,19 @@ #include "opencv2/cudev.hpp" #include "opencv2/core/private.cuda.hpp" +using namespace cv; +using namespace cv::cuda; using namespace cv::cudev; void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); const size_t elemSize = src.elemSize(); CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 ); - _dst.create( src.cols, src.rows, src.type() ); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream); if (elemSize == 1) { @@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) { gridTranspose(globPtr(src), globPtr(dst), stream); } + + syncOutput(dst, _dst, stream); } #endif diff --git a/modules/cudaarithm/src/element_operations.cpp b/modules/cudaarithm/src/element_operations.cpp index 795d7ff..f881195 100644 --- a/modules/cudaarithm/src/element_operations.cpp +++ b/modules/cudaarithm/src/element_operations.cpp @@ -107,11 +107,11 @@ namespace GpuMat src1; if (!isScalar1) - src1 = _src1.getGpuMat(); + src1 = getInputMat(_src1, stream); GpuMat src2; if (!isScalar2) - src2 = _src2.getGpuMat(); + src2 = getInputMat(_src2, stream); Mat scalar; if (isScalar1) @@ -126,7 +126,7 @@ namespace scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); } - GpuMat mask = _mask.getGpuMat(); + GpuMat mask = getInputMat(_mask, stream); const int sdepth = src1.empty() ? src2.depth() : src1.depth(); const int cn = src1.empty() ? src2.channels() : src1.channels(); @@ -147,8 +147,7 @@ namespace CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); } - _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream); if (isScalar1) mat_scalar_func(src2, val, true, dst, mask, scale, stream, op); @@ -156,6 +155,8 @@ namespace mat_scalar_func(src1, val, false, dst, mask, scale, stream, op); else mat_mat_func(src1, src2, dst, mask, scale, stream, op); + + syncOutput(dst, _dst, stream); } } @@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do { if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) { - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), src1.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); mulMat_8uc4_32f(src1, src2, dst, stream); + + syncOutput(dst, _dst, stream); } else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) { - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), src1.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); mulMat_16sc4_32f(src1, src2, dst, stream); + + syncOutput(dst, _dst, stream); } else { @@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub { if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) { - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), src1.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); divMat_8uc4_32f(src1, src2, dst, stream); + + syncOutput(dst, _dst, stream); } else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) { - GpuMat src1 = _src1.getGpuMat(); - GpuMat src2 = _src2.getGpuMat(); + GpuMat src1 = getInputMat(_src1, stream); + GpuMat src2 = getInputMat(_src2, stream); CV_Assert( src1.size() == src2.size() ); - _dst.create(src1.size(), src1.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream); divMat_16sc4_32f(src1, src2, dst, stream); + + syncOutput(dst, _dst, stream); } else { @@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_ val, OutputArray _dst, Strea {NppShift::call, 0, NppShift::call, NppShift::call}, }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); CV_Assert( src.depth() < CV_32F ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); } void cv::cuda::lshift(InputArray _src, Scalar_ val, OutputArray _dst, Stream& stream) @@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_ val, OutputArray _dst, Strea {NppShift::call, 0, NppShift::call, NppShift::call}, }; - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - _dst.create(src.size(), src.type()); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); } ////////////////////////////////////////////////////////////////////////////// @@ -475,22 +482,24 @@ namespace void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - _dst.create(src.size(), CV_32FC1); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream); npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); } void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream) { - GpuMat src = _src.getGpuMat(); + GpuMat src = getInputMat(_src, stream); - _dst.create(src.size(), CV_32FC1); - GpuMat dst = _dst.getGpuMat(); + GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream); npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); + + syncOutput(dst, _dst, stream); } #endif