From: Vladislav Vinogradov Date: Mon, 26 Mar 2012 11:02:03 +0000 (+0000) Subject: fixed gpu arithm functions (mismatch with cpu version) X-Git-Tag: accepted/2.0/20130307.220821~1016 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9034a2d6d7d2309094d773fe9f6f49816d230857;p=profile%2Fivi%2Fopencv.git fixed gpu arithm functions (mismatch with cpu version) --- diff --git a/modules/gpu/src/cuda/element_operations.cu b/modules/gpu/src/cuda/element_operations.cu index 4f4efc2..71f6a21 100644 --- a/modules/gpu/src/cuda/element_operations.cu +++ b/modules/gpu/src/cuda/element_operations.cu @@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device template struct Multiply : binary_function { - Multiply(double scale_) : scale(scale_) {} + Multiply(float scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { return saturate_cast(scale * a * b); } + const float scale; + }; + template struct Multiply : binary_function + { + Multiply(double scale_) : scale(scale_) {} + __device__ __forceinline__ double operator ()(T a, T b) const + { + return scale * a * b; + } + const double scale; + }; + template <> struct Multiply : binary_function + { + Multiply(double scale_) : scale(scale_) {} + __device__ __forceinline__ int operator ()(int a, int b) const + { + return saturate_cast(scale * a * b); + } const double scale; }; @@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device enum { smart_shift = 4 }; }; + template struct MultiplyCaller + { + static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) + { + Multiply op(static_cast(scale)); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); + } + }; + template struct MultiplyCaller + { + static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) + { + cudaSafeCall( cudaSetDoubleForDevice(&scale) ); + Multiply op(scale); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); + } + }; + template <> struct MultiplyCaller + { + static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) + { + cudaSafeCall( cudaSetDoubleForDevice(&scale) ); + Multiply op(scale); + cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); + } + }; + template void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) { - cudaSafeCall( cudaSetDoubleForDevice(&scale) ); - Multiply op(scale); - cv::gpu::device::transform((DevMem2D_)src1, (DevMem2D_)src2, (DevMem2D_)dst, op, WithOutMask(), stream); + MultiplyCaller::call(src1, src2, dst, scale, stream); } template void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); @@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device Divide(double scale_) : scale(scale_) {} __device__ __forceinline__ D operator ()(T a, T b) const { - return b != 0 ? saturate_cast(scale * a / b) : 0; + return b != 0 ? saturate_cast(a * scale / b) : 0; } const double scale; }; diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index 94eefe9..1d00a3e 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -115,7 +115,7 @@ namespace { typedef typename NppArithmFunc::npp_t npp_t; - static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) + static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -124,21 +124,17 @@ namespace sz.height = src1.rows; nppSafeCall( func((const npp_t*)src1.data, static_cast(src1.step), (const npp_t*)src2.data, static_cast(src2.step), - (npp_t*)dst.data, static_cast(dst.step), sz, 0) ); + (npp_t*)dst.data, static_cast(dst.step), sz, 0) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) - { - call(src1, src2, dst, PtrStepb(), stream); - } }; template ::func_t func> struct NppArithm { typedef typename NppArithmFunc::npp_t npp_t; - static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) + static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -147,83 +143,13 @@ namespace sz.height = src1.rows; nppSafeCall( func((const npp_t*)src1.data, static_cast(src1.step), (const npp_t*)src2.data, static_cast(src2.step), - (npp_t*)dst.data, static_cast(dst.step), sz) ); + (npp_t*)dst.data, static_cast(dst.step), sz) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); } - static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) - { - call(src1, src2, dst, PtrStepb(), stream); - } - }; -} - -//////////////////////////////////////////////////////////////////////// -// add - -namespace cv { namespace gpu { namespace device -{ - template - void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); - - template - void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); -}}} - -void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) -{ - using namespace ::cv::gpu::device; - - typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); - - static const func_t funcs[7][7] = - { - {add_gpu, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/}, - {0/*add_gpu*/, 0/*add_gpu*/, add_gpu, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu} }; - static const func_t npp_funcs[7] = - { - NppArithm::call, - 0, - NppArithm::call, - NppArithm::call, - NppArithm::call, - NppArithm::call, - add_gpu - }; - - CV_Assert(src1.type() != CV_8S); - CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); - CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U)); - - if (dtype < 0) - dtype = src1.depth(); - - dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); - - cudaStream_t stream = StreamAccessor::getStream(s); - - if (mask.empty() && dst.type() == src1.type()) - { - npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), PtrStepb(), stream); - return; - } - - const func_t func = funcs[src1.depth()][dst.depth()]; - CV_Assert(func != 0); - - func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream); -} - -namespace -{ template struct NppArithmScalarFunc { typedef typename NppTypeTraits::npp_t npp_t; @@ -262,7 +188,7 @@ namespace { typedef typename NppTypeTraits::npp_t npp_t; - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -272,7 +198,7 @@ namespace const npp_t pConstants[] = { saturate_cast(sc.val[0]), saturate_cast(sc.val[1]), saturate_cast(sc.val[2]), saturate_cast(sc.val[3]) }; - nppSafeCall( func(src.ptr(), static_cast(src.step), pConstants, dst.ptr(), static_cast(dst.step), sz, 0) ); + nppSafeCall( func((const npp_t*)src.data, static_cast(src.step), pConstants, (npp_t*)dst.data, static_cast(dst.step), sz, 0) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -282,7 +208,7 @@ namespace { typedef typename NppTypeTraits::npp_t npp_t; - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -290,7 +216,7 @@ namespace sz.width = src.cols; sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), saturate_cast(sc.val[0]), dst.ptr(), static_cast(dst.step), sz, 0) ); + nppSafeCall( func((const npp_t*)src.data, static_cast(src.step), saturate_cast(sc.val[0]), (npp_t*)dst.data, static_cast(dst.step), sz, 0) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -301,7 +227,7 @@ namespace typedef typename NppTypeTraits::npp_t npp_t; typedef typename NppTypeTraits::npp_complex_type npp_complex_type; - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -313,8 +239,8 @@ namespace nConstant.re = saturate_cast(sc.val[0]); nConstant.im = saturate_cast(sc.val[1]); - nppSafeCall( func(src.ptr(), static_cast(src.step), nConstant, - dst.ptr(), static_cast(dst.step), sz, 0) ); + nppSafeCall( func((const npp_complex_type*)src.data, static_cast(src.step), nConstant, + (npp_complex_type*)dst.data, static_cast(dst.step), sz, 0) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -322,7 +248,9 @@ namespace }; template::func_ptr func> struct NppArithmScalar { - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + typedef typename NppTypeTraits::npp_t npp_t; + + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -332,7 +260,7 @@ namespace const Npp32f pConstants[] = { saturate_cast(sc.val[0]), saturate_cast(sc.val[1]), saturate_cast(sc.val[2]), saturate_cast(sc.val[3]) }; - nppSafeCall( func(src.ptr(), static_cast(src.step), pConstants, dst.ptr(), static_cast(dst.step), sz) ); + nppSafeCall( func((const npp_t*)src.data, static_cast(src.step), pConstants, (npp_t*)dst.data, static_cast(dst.step), sz) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -340,7 +268,9 @@ namespace }; template::func_ptr func> struct NppArithmScalar { - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + typedef typename NppTypeTraits::npp_t npp_t; + + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -348,7 +278,7 @@ namespace sz.width = src.cols; sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), saturate_cast(sc.val[0]), dst.ptr(), static_cast(dst.step), sz) ); + nppSafeCall( func((const npp_t*)src.data, static_cast(src.step), saturate_cast(sc.val[0]), (npp_t*)dst.data, static_cast(dst.step), sz) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -356,7 +286,10 @@ namespace }; template::func_ptr func> struct NppArithmScalar { - static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) + typedef typename NppTypeTraits::npp_t npp_t; + typedef typename NppTypeTraits::npp_complex_type npp_complex_type; + + static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream) { NppStreamHandler h(stream); @@ -368,7 +301,7 @@ namespace nConstant.re = saturate_cast(sc.val[0]); nConstant.im = saturate_cast(sc.val[1]); - nppSafeCall( func(src.ptr(), static_cast(src.step), nConstant, dst.ptr(), static_cast(dst.step), sz) ); + nppSafeCall( func((const npp_complex_type*)src.data, static_cast(src.step), nConstant, (npp_complex_type*)dst.data, static_cast(dst.step), sz) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); @@ -376,40 +309,117 @@ namespace }; } +//////////////////////////////////////////////////////////////////////// +// add + +namespace cv { namespace gpu { namespace device +{ + template + void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); + + template + void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); +}}} + +void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) +{ + using namespace cv::gpu::device; + + typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); + static const func_t funcs[7][7] = + { + {add_gpu , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/, 0 /*add_gpu*/ , 0 /*add_gpu*/, 0 /*add_gpu*/, 0 /*add_gpu*/}, + {0 /*add_gpu*/, 0 /*add_gpu*/, add_gpu , 0 /*add_gpu*/, add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu } + }; + + typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream); + static const npp_func_t npp_funcs[] = + { + NppArithm::call, + 0, + NppArithm::call, + NppArithm::call, + NppArithm::call, + NppArithm::call + }; + + if (dtype < 0) + dtype = src1.depth(); + + CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); + CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U)); + + if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + + dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); + + cudaStream_t stream = StreamAccessor::getStream(s); + + if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F) + { + npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); + return; + } + + const func_t func = funcs[src1.depth()][dst.depth()]; + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream); +} + void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); - static const func_t funcs[7][7] = { - {add_gpu, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/}, - {0/*add_gpu*/, 0/*add_gpu*/, add_gpu, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu, add_gpu}, - {0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, 0/*add_gpu*/, add_gpu} + {add_gpu , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/, 0 /*add_gpu*/ , 0 /*add_gpu*/, 0 /*add_gpu*/, 0 /*add_gpu*/}, + {0 /*add_gpu*/, 0 /*add_gpu*/, add_gpu , 0 /*add_gpu*/, add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu , add_gpu }, + {0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , 0 /*add_gpu*/ , add_gpu } }; - typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); + typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = { - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0}, - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {NppArithmScalar::call, 0 , NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0 , 0 , 0 }, + {NppArithmScalar::call, 0 , NppArithmScalar::call, NppArithmScalar::call}, {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, - {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, 0}, - {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0} + {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, 0 }, + {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0 , 0 , 0 } }; - CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U)); - if (dtype < 0) dtype = src.depth(); + CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src.channels() <= 4); + CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U)); + + if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); cudaStream_t stream = StreamAccessor::getStream(s); @@ -428,7 +438,9 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat CV_Assert(src.channels() == 1); const func_t func = funcs[src.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src, sc.val[0], dst, mask, stream); } @@ -447,37 +459,43 @@ namespace cv { namespace gpu { namespace device void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); - static const func_t funcs[7][7] = { - {subtract_gpu, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu} + {subtract_gpu , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/, 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/, 0 /*subtract_gpu*/, 0 /*subtract_gpu*/}, + {0 /*subtract_gpu*/, 0 /*subtract_gpu*/, subtract_gpu , 0 /*subtract_gpu*/, subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu } }; - static const func_t npp_funcs[6] = + typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream); + static const npp_func_t npp_funcs[6] = { - NppArithm::call, + NppArithm::call, 0, NppArithm::call, NppArithm::call, NppArithm::call, - NppArithm::call + NppArithm::call }; - CV_Assert(src1.type() != CV_8S); + if (dtype < 0) + dtype = src1.depth(); + + CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U)); - if (dtype < 0) - dtype = src1.depth(); + if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); @@ -485,50 +503,59 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F) { - npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream); + npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream); return; } const func_t func = funcs[src1.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream); } void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); - static const func_t funcs[7][7] = { - {subtract_gpu, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu, subtract_gpu}, - {0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, 0/*subtract_gpu*/, subtract_gpu} + {subtract_gpu , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/, 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/, 0 /*subtract_gpu*/, 0 /*subtract_gpu*/}, + {0 /*subtract_gpu*/, 0 /*subtract_gpu*/, subtract_gpu , 0 /*subtract_gpu*/, subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu , subtract_gpu }, + {0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , 0 /*subtract_gpu*/ , subtract_gpu } }; - typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); + typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = { - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0}, - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {NppArithmScalar::call, 0 , NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0 , 0 , 0 }, + {NppArithmScalar::call, 0 , NppArithmScalar::call, NppArithmScalar::call}, {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, - {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, 0}, - {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0} + {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, 0 }, + {NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0 , 0 , 0 } }; - CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U)); - if (dtype < 0) dtype = src.depth(); + CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src.channels() <= 4); + CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U)); + + if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); cudaStream_t stream = StreamAccessor::getStream(s); @@ -547,7 +574,9 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G CV_Assert(src.channels() == 1); const func_t func = funcs[src.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src, sc.val[0], dst, mask, stream); } @@ -569,31 +598,7 @@ namespace cv { namespace gpu { namespace device void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) { - using namespace ::cv::gpu::device; - - typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); - - static const func_t funcs[7][7] = - { - {multiply_gpu, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu} - }; - - static const func_t npp_funcs[7] = - { - NppArithm::call, - 0, - NppArithm::call, - NppArithm::call, - NppArithm::call, - NppArithm::call, - multiply_gpu - }; + using namespace cv::gpu::device; cudaStream_t stream = StreamAccessor::getStream(s); @@ -615,22 +620,53 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub } else { - CV_Assert(src1.type() != CV_8S); - CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); + typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = + { + {multiply_gpu , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/, 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/, 0 /*multiply_gpu*/, 0 /*multiply_gpu*/}, + {0 /*multiply_gpu*/, 0 /*multiply_gpu*/, multiply_gpu , 0 /*multiply_gpu*/, multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu } + }; + + typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream); + static const npp_func_t npp_funcs[] = + { + NppArithm::call, + 0, + NppArithm::call, + NppArithm::call, + NppArithm::call, + NppArithm::call + }; if (dtype < 0) dtype = src1.depth(); + CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); + + if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); - if (scale == 1 && dst.type() == src1.type()) + if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) { - npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), 1, stream); + npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); return; } const func_t func = funcs[src1.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream); } @@ -646,56 +682,67 @@ namespace void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - {multiply_gpu, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu, multiply_gpu}, - {0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, 0/*multiply_gpu*/, multiply_gpu} + {multiply_gpu , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/, 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/, 0 /*multiply_gpu*/, 0 /*multiply_gpu*/}, + {0 /*multiply_gpu*/, 0 /*multiply_gpu*/, multiply_gpu , 0 /*multiply_gpu*/, multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu , multiply_gpu }, + {0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , 0 /*multiply_gpu*/ , multiply_gpu } }; - typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); + typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = { - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0}, + {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0, 0 , 0 }, {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {NppArithmScalar::call, 0, NppArithmScalar::call, 0}, - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0} + {NppArithmScalar::call, 0, NppArithmScalar::call, 0 }, + {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0, 0 , 0 } }; if (dtype < 0) dtype = src.depth(); + CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src.channels() <= 4); + + if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); cudaStream_t stream = StreamAccessor::getStream(s); - if (dst.type() == src.type() && scale == 1) + if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc))) { const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1]; - if (npp_func && (src.depth() == CV_32F || isIntScalar(sc))) + if (npp_func) { npp_func(src, sc, dst, stream); return; } } + CV_Assert(src.channels() == 1); + const func_t func = funcs[src.depth()][dst.depth()]; - CV_Assert(func != 0); + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream); + func(src, sc.val[0], dst, scale, stream); } //////////////////////////////////////////////////////////////////////// @@ -718,30 +765,7 @@ namespace cv { namespace gpu { namespace device void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) { - using namespace ::cv::gpu::device; - - typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); - - static const func_t funcs[7][7] = - { - {divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/}, - {0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu} - }; - - static const func_t npp_funcs[6] = - { - NppArithm::call, - 0, - NppArithm::call, - NppArithm::call, - NppArithm::call, - NppArithm::call - }; + using namespace cv::gpu::device; cudaStream_t stream = StreamAccessor::getStream(s); @@ -763,22 +787,53 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double } else { - CV_Assert(src1.type() != CV_8S); - CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); + typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = + { + {divide_gpu , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/, 0 /*divide_gpu*/}, + {0 /*divide_gpu*/, 0 /*divide_gpu*/, divide_gpu , 0 /*divide_gpu*/, divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu } + }; + + typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream); + static const npp_func_t npp_funcs[6] = + { + NppArithm::call, + 0, + NppArithm::call, + NppArithm::call, + NppArithm::call, + NppArithm::call + }; if (dtype < 0) dtype = src1.depth(); + CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); + + if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) { - npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream); + npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream); return; } const func_t func = funcs[src1.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream); } @@ -786,86 +841,105 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - {divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/}, - {0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu} + {divide_gpu , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/, 0 /*divide_gpu*/}, + {0 /*divide_gpu*/, 0 /*divide_gpu*/, divide_gpu , 0 /*divide_gpu*/, divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu } }; - typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); + typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream); static const npp_func_t npp_funcs[7][4] = { - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0}, + {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0, 0 , 0 }, {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {NppArithmScalar::call, 0, NppArithmScalar::call, 0}, - {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, - {0,0,0,0} + {NppArithmScalar::call, 0, NppArithmScalar::call, 0 }, + {NppArithmScalar::call, 0, NppArithmScalar::call, NppArithmScalar::call}, + {0 , 0, 0 , 0 } }; if (dtype < 0) dtype = src.depth(); + CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src.channels() <= 4); + + if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); cudaStream_t stream = StreamAccessor::getStream(s); - if (dst.type() == src.type() && scale == 1) + if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc))) { const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1]; - if (npp_func && (src.depth() == CV_32F || isIntScalar(sc))) + if (npp_func) { npp_func(src, sc, dst, stream); return; } } + CV_Assert(src.channels() == 1); + const func_t func = funcs[src.depth()][dst.depth()]; - CV_Assert(func != 0); + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream); + func(src, sc.val[0], dst, scale, stream); } void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s) { - using namespace ::cv::gpu::device; + using namespace cv::gpu::device; typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); - static const func_t funcs[7][7] = { - {divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/}, - {0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu, divide_gpu}, - {0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, 0/*divide_gpu*/, divide_gpu} + {divide_gpu , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/ , 0 /*divide_gpu*/, 0 /*divide_gpu*/, 0 /*divide_gpu*/}, + {0 /*divide_gpu*/, 0 /*divide_gpu*/, divide_gpu , 0 /*divide_gpu*/, divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu , divide_gpu }, + {0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , 0 /*divide_gpu*/ , divide_gpu } }; - CV_Assert(src.channels() == 1); - if (dtype < 0) dtype = src.depth(); + CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F); + CV_Assert(src.channels() == 1); + + if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F) + { + if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE)) + CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double"); + } + dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); cudaStream_t stream = StreamAccessor::getStream(s); const func_t func = funcs[src.depth()][dst.depth()]; - CV_Assert(func != 0); + + if (!func) + CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types"); func(scale, src, dst, stream); } diff --git a/modules/gpu/src/stereobm.cpp b/modules/gpu/src/stereobm.cpp index f1ad920..41bbb51 100644 --- a/modules/gpu/src/stereobm.cpp +++ b/modules/gpu/src/stereobm.cpp @@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, #else /* !defined (HAVE_CUDA) */ -namespace cv { namespace gpu { namespace device +namespace cv { namespace gpu { namespace device { namespace stereobm { @@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device } }}} -const float defaultAvgTexThreshold = 3; +namespace +{ + const float defaultAvgTexThreshold = 3; +} cv::gpu::StereoBM_GPU::StereoBM_GPU() - : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) + : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) { } @@ -100,9 +103,9 @@ namespace { using namespace ::cv::gpu::device::stereobm; - CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); - CV_DbgAssert(left.type() == CV_8UC1); - CV_DbgAssert(right.type() == CV_8UC1); + CV_Assert(left.rows == right.rows && left.cols == right.cols); + CV_Assert(left.type() == CV_8UC1); + CV_Assert(right.type() == CV_8UC1); disparity.create(left.size(), CV_8U); minSSD.create(left.size(), CV_32S); @@ -115,7 +118,7 @@ namespace leBuf.create( left.size(), left.type()); riBuf.create(right.size(), right.type()); - prefilter_xsobel( left, leBuf, 31, stream); + prefilter_xsobel( left, leBuf, 31, stream); prefilter_xsobel(right, riBuf, 31, stream); le_for_bm = leBuf; diff --git a/modules/gpu/test/test_core.cpp b/modules/gpu/test/test_core.cpp index 5a3b79c..ec5a515 100644 --- a/modules/gpu/test/test_core.cpp +++ b/modules/gpu/test/test_core.cpp @@ -50,7 +50,7 @@ PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; int channels; bool useRoi; @@ -78,14 +78,29 @@ TEST_P(Add_Array, Accuracy) cv::Mat mat2 = randomMat(size, stype); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); - cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); - dst.setTo(cv::Scalar::all(0)); - cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); - cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); - cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); + cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); + cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine( @@ -102,7 +117,7 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; bool useRoi; virtual void SetUp() @@ -116,20 +131,65 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +TEST_P(Add_Scalar, WithMask) { cv::Mat mat = randomMat(size, depth.first); cv::Scalar val = randomScalar(0, 255); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); - cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); - dst.setTo(cv::Scalar::all(0)); - cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); - cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); - cv::add(mat, val, dst_gold, mask, depth.second); + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::add(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine( @@ -145,7 +205,7 @@ PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; int channels; bool useRoi; @@ -173,14 +233,29 @@ TEST_P(Subtract_Array, Accuracy) cv::Mat mat2 = randomMat(size, stype); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); - cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); - dst.setTo(cv::Scalar::all(0)); - cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); - cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); - cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); + cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); + cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine( @@ -197,7 +272,7 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; bool useRoi; virtual void SetUp() @@ -211,20 +286,65 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +TEST_P(Subtract_Scalar, WithMask) { cv::Mat mat = randomMat(size, depth.first); cv::Scalar val = randomScalar(0, 255); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); - cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); - dst.setTo(cv::Scalar::all(0)); - cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); - cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); - cv::subtract(mat, val, dst_gold, mask, depth.second); + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::subtract(mat, val, dst_gold, mask, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine( @@ -240,7 +360,7 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; int channels; bool useRoi; @@ -262,19 +382,63 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +TEST_P(Multiply_Array, WithScale) { cv::Mat mat1 = randomMat(size, stype); cv::Mat mat2 = randomMat(size, stype); double scale = randomDouble(0.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); - cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); + cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); - cv::Mat dst_gold; - cv::multiply(mat1, mat2, dst_gold, scale, depth.second); + cv::Mat dst_gold; + cv::multiply(mat1, mat2, dst_gold, scale, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine( @@ -389,7 +553,7 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; bool useRoi; virtual void SetUp() @@ -403,19 +567,64 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0); + } +} + + +TEST_P(Multiply_Scalar, WithScale) { cv::Mat mat = randomMat(size, depth.first); cv::Scalar val = randomScalar(0, 255); double scale = randomDouble(0.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); - cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second); - cv::Mat dst_gold; - cv::multiply(mat, val, dst_gold, scale, depth.second); + cv::Mat dst_gold; + cv::multiply(mat, val, dst_gold, scale, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine( @@ -431,7 +640,7 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; int channels; bool useRoi; @@ -453,19 +662,64 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); + } +} + + +TEST_P(Divide_Array, WithScale) { cv::Mat mat1 = randomMat(size, stype); cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0); double scale = randomDouble(0.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); - cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); + cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); - cv::Mat dst_gold; - cv::divide(mat1, mat2, dst_gold, scale, depth.second); + cv::Mat dst_gold; + cv::divide(mat1, mat2, dst_gold, scale, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine( @@ -580,7 +834,7 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; bool useRoi; virtual void SetUp() @@ -594,19 +848,63 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +TEST_P(Divide_Scalar, WithScale) { cv::Mat mat = randomMat(size, depth.first); cv::Scalar val = randomScalar(1.0, 255.0); double scale = randomDouble(0.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); - cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second); - cv::Mat dst_gold; - cv::divide(mat, val, dst_gold, scale, depth.second); + cv::Mat dst_gold; + cv::divide(mat, val, dst_gold, scale, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar, testing::Combine( @@ -622,7 +920,7 @@ PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair depth; + std::pair depth; bool useRoi; virtual void SetUp() @@ -641,13 +939,28 @@ TEST_P(Divide_Scalar_Inv, Accuracy) double scale = randomDouble(0.0, 255.0); cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0); - cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); - cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second); + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::divide(scale, loadMat(mat), dst, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(CV_StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second); - cv::Mat dst_gold; - cv::divide(scale, mat, dst_gold, depth.second); + cv::Mat dst_gold; + cv::divide(scale, mat, dst_gold, depth.second); - EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } } INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar_Inv, testing::Combine( diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index e68a8cb..33cc72a 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he } static void -cvImageWidget_size_allocate (GtkWidget *widget, +cvImageWidget_size_allocate (GtkWidget *widget, GtkAllocation *allocation) { CvImageWidget *image_widget; @@ -719,7 +719,7 @@ namespace void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const; bool isGlContextInitialized() const; - + PFNGLGENBUFFERSPROC glGenBuffersExt; PFNGLDELETEBUFFERSPROC glDeleteBuffersExt; @@ -866,22 +866,22 @@ namespace CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" ); - __BEGIN__; - + __BEGIN__; + fontDecr = pango_font_description_new(); - + pango_font_description_set_size(fontDecr, height); - + pango_font_description_set_family_static(fontDecr, family.c_str()); - + pango_font_description_set_weight(fontDecr, static_cast(weight)); - + pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL); - + pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base); - + pango_font_description_free(fontDecr); - + if (!pangoFont) CV_ERROR(CV_OpenGlApiCallError, "Can't create font"); @@ -960,13 +960,13 @@ namespace void releaseGlContext(CvWindow* window) { - CV_FUNCNAME( "releaseGlContext" ); + //CV_FUNCNAME( "releaseGlContext" ); - __BEGIN__; + //__BEGIN__; window->useGl = false; - __END__; + //__END__; } void drawGl(CvWindow* window)