{\r
typedef typename NppArithmFunc<DEPTH>::npp_t npp_t;\r
\r
- static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)\r
+ static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
sz.height = src1.rows;\r
\r
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),\r
- (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );\r
+ (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
- static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)\r
- {\r
- call(src1, src2, dst, PtrStepb(), stream);\r
- }\r
};\r
template <typename NppArithmFunc<CV_32F>::func_t func> struct NppArithm<CV_32F, func>\r
{\r
typedef typename NppArithmFunc<CV_32F>::npp_t npp_t;\r
\r
- static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)\r
+ static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
sz.height = src1.rows;\r
\r
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),\r
- (npp_t*)dst.data, static_cast<int>(dst.step), sz) );\r
+ (npp_t*)dst.data, static_cast<int>(dst.step), sz) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
- static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)\r
- {\r
- call(src1, src2, dst, PtrStepb(), stream);\r
- }\r
- };\r
-}\r
-\r
-////////////////////////////////////////////////////////////////////////\r
-// add\r
-\r
-namespace cv { namespace gpu { namespace device\r
-{\r
- template <typename T, typename D>\r
- void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-\r
- template <typename T, typename D>\r
- void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-}}}\r
-\r
-void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)\r
-{\r
- using namespace ::cv::gpu::device;\r
-\r
- typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-\r
- static const func_t funcs[7][7] =\r
- {\r
- {add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},\r
- {0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},\r
- {0/*add_gpu<unsigned short, unsigned char>*/, 0/*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short>, 0/*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int>, add_gpu<unsigned short, float>, add_gpu<unsigned short, double>},\r
- {0/*add_gpu<short, unsigned char>*/, 0/*add_gpu<short, signed char>*/, 0/*add_gpu<short, unsigned short>*/, add_gpu<short, short>, add_gpu<short, int>, add_gpu<short, float>, add_gpu<short, double>},\r
- {0/*add_gpu<int, unsigned char>*/, 0/*add_gpu<int, signed char>*/, 0/*add_gpu<int, unsigned short>*/, 0/*add_gpu<int, short>*/, add_gpu<int, int>, add_gpu<int, float>, add_gpu<int, double>},\r
- {0/*add_gpu<float, unsigned char>*/, 0/*add_gpu<float, signed char>*/, 0/*add_gpu<float, unsigned short>*/, 0/*add_gpu<float, short>*/, 0/*add_gpu<float, int>*/, add_gpu<float, float>, add_gpu<float, double>},\r
- {0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>}\r
};\r
\r
- static const func_t npp_funcs[7] =\r
- {\r
- NppArithm<CV_8U, nppiAdd_8u_C1RSfs>::call,\r
- 0,\r
- NppArithm<CV_16U, nppiAdd_16u_C1RSfs>::call,\r
- NppArithm<CV_16S, nppiAdd_16s_C1RSfs>::call,\r
- NppArithm<CV_32S, nppiAdd_32s_C1RSfs>::call,\r
- NppArithm<CV_32F, nppiAdd_32f_C1R>::call,\r
- add_gpu<double, double>\r
- };\r
-\r
- CV_Assert(src1.type() != CV_8S);\r
- CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
- CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));\r
-\r
- if (dtype < 0)\r
- dtype = src1.depth();\r
-\r
- dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));\r
-\r
- cudaStream_t stream = StreamAccessor::getStream(s);\r
-\r
- if (mask.empty() && dst.type() == src1.type())\r
- {\r
- npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), PtrStepb(), stream);\r
- return;\r
- }\r
-\r
- const func_t func = funcs[src1.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
-\r
- func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);\r
-}\r
-\r
-namespace\r
-{\r
template<int DEPTH, int cn> struct NppArithmScalarFunc\r
{\r
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;\r
{\r
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;\r
\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
\r
const npp_t pConstants[] = { saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3]) };\r
\r
- nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), sz, 0) );\r
+ nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
{\r
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;\r
\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
sz.width = src.cols;\r
sz.height = src.rows;\r
\r
- nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), sz, 0) );\r
+ nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;\r
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;\r
\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
nConstant.re = saturate_cast<npp_t>(sc.val[0]);\r
nConstant.im = saturate_cast<npp_t>(sc.val[1]);\r
\r
- nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant,\r
- dst.ptr<npp_complex_type>(), static_cast<int>(dst.step), sz, 0) );\r
+ nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant,\r
+ (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz, 0) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
};\r
template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func>\r
{\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;\r
+\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
\r
const Npp32f pConstants[] = { saturate_cast<Npp32f>(sc.val[0]), saturate_cast<Npp32f>(sc.val[1]), saturate_cast<Npp32f>(sc.val[2]), saturate_cast<Npp32f>(sc.val[3]) };\r
\r
- nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), pConstants, dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
};\r
template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func>\r
{\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;\r
+\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
sz.width = src.cols;\r
sz.height = src.rows;\r
\r
- nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
};\r
template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func>\r
{\r
- static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream)\r
+ typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;\r
+ typedef typename NppTypeTraits<CV_32F>::npp_complex_type npp_complex_type;\r
+\r
+ static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)\r
{\r
NppStreamHandler h(stream);\r
\r
nConstant.re = saturate_cast<Npp32f>(sc.val[0]);\r
nConstant.im = saturate_cast<Npp32f>(sc.val[1]);\r
\r
- nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), nConstant, dst.ptr<Npp32fc>(), static_cast<int>(dst.step), sz) );\r
+ nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant, (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz) );\r
\r
if (stream == 0)\r
cudaSafeCall( cudaDeviceSynchronize() );\r
};\r
}\r
\r
+////////////////////////////////////////////////////////////////////////\r
+// add\r
+\r
+namespace cv { namespace gpu { namespace device\r
+{\r
+ template <typename T, typename D>\r
+ void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
+\r
+ template <typename T, typename D>\r
+ void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
+}}}\r
+\r
+void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)\r
+{\r
+ using namespace cv::gpu::device;\r
+\r
+ typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
+ static const func_t funcs[7][7] =\r
+ {\r
+ {add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },\r
+ {0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/, 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/, 0 /*add_gpu<signed char, float>*/, 0 /*add_gpu<signed char, double>*/},\r
+ {0 /*add_gpu<unsigned short, unsigned char>*/, 0 /*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short> , 0 /*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int> , add_gpu<unsigned short, float> , add_gpu<unsigned short, double> },\r
+ {0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu<short, short> , add_gpu<short, int> , add_gpu<short, float> , add_gpu<short, double> },\r
+ {0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu<int, int> , add_gpu<int, float> , add_gpu<int, double> },\r
+ {0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu<float, float> , add_gpu<float, double> },\r
+ {0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }\r
+ };\r
+\r
+ typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);\r
+ static const npp_func_t npp_funcs[] =\r
+ {\r
+ NppArithm<CV_8U , nppiAdd_8u_C1RSfs >::call,\r
+ 0,\r
+ NppArithm<CV_16U, nppiAdd_16u_C1RSfs>::call,\r
+ NppArithm<CV_16S, nppiAdd_16s_C1RSfs>::call,\r
+ NppArithm<CV_32S, nppiAdd_32s_C1RSfs>::call,\r
+ NppArithm<CV_32F, nppiAdd_32f_C1R >::call\r
+ };\r
+\r
+ if (dtype < 0)\r
+ dtype = src1.depth();\r
+\r
+ CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
+ CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));\r
+\r
+ if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
+ dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));\r
+\r
+ cudaStream_t stream = StreamAccessor::getStream(s);\r
+\r
+ if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)\r
+ {\r
+ npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
+ return;\r
+ }\r
+\r
+ const func_t func = funcs[src1.depth()][dst.depth()];\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
+\r
+ func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);\r
+}\r
+\r
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},\r
- {0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},\r
- {0/*add_gpu<unsigned short, unsigned char>*/, 0/*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short>, 0/*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int>, add_gpu<unsigned short, float>, add_gpu<unsigned short, double>},\r
- {0/*add_gpu<short, unsigned char>*/, 0/*add_gpu<short, signed char>*/, 0/*add_gpu<short, unsigned short>*/, add_gpu<short, short>, add_gpu<short, int>, add_gpu<short, float>, add_gpu<short, double>},\r
- {0/*add_gpu<int, unsigned char>*/, 0/*add_gpu<int, signed char>*/, 0/*add_gpu<int, unsigned short>*/, 0/*add_gpu<int, short>*/, add_gpu<int, int>, add_gpu<int, float>, add_gpu<int, double>},\r
- {0/*add_gpu<float, unsigned char>*/, 0/*add_gpu<float, signed char>*/, 0/*add_gpu<float, unsigned short>*/, 0/*add_gpu<float, short>*/, 0/*add_gpu<float, int>*/, add_gpu<float, float>, add_gpu<float, double>},\r
- {0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>}\r
+ {add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },\r
+ {0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/, 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/, 0 /*add_gpu<signed char, float>*/, 0 /*add_gpu<signed char, double>*/},\r
+ {0 /*add_gpu<unsigned short, unsigned char>*/, 0 /*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short> , 0 /*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int> , add_gpu<unsigned short, float> , add_gpu<unsigned short, double> },\r
+ {0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu<short, short> , add_gpu<short, int> , add_gpu<short, float> , add_gpu<short, double> },\r
+ {0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu<int, int> , add_gpu<int, float> , add_gpu<int, double> },\r
+ {0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu<float, float> , add_gpu<float, double> },\r
+ {0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }\r
};\r
\r
- typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);\r
+ typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);\r
static const npp_func_t npp_funcs[7][4] =\r
{\r
- {NppArithmScalar<CV_8U, 1, nppiAddC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiAddC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiAddC_8u_C4RSfs>::call},\r
- {0,0,0,0},\r
- {NppArithmScalar<CV_16U, 1, nppiAddC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiAddC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiAddC_16u_C4RSfs>::call},\r
+ {NppArithmScalar<CV_8U , 1, nppiAddC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiAddC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiAddC_8u_C4RSfs >::call},\r
+ {0 , 0 , 0 , 0 },\r
+ {NppArithmScalar<CV_16U, 1, nppiAddC_16u_C1RSfs>::call, 0 , NppArithmScalar<CV_16U, 3, nppiAddC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiAddC_16u_C4RSfs>::call},\r
{NppArithmScalar<CV_16S, 1, nppiAddC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiAddC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiAddC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiAddC_16s_C4RSfs>::call},\r
- {NppArithmScalar<CV_32S, 1, nppiAddC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiAddC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiAddC_32s_C3RSfs>::call, 0},\r
- {NppArithmScalar<CV_32F, 1, nppiAddC_32f_C1R>::call, NppArithmScalar<CV_32F, 2, nppiAddC_32fc_C1R>::call, NppArithmScalar<CV_32F, 3, nppiAddC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiAddC_32f_C4R>::call},\r
- {0,0,0,0}\r
+ {NppArithmScalar<CV_32S, 1, nppiAddC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiAddC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiAddC_32s_C3RSfs>::call, 0 },\r
+ {NppArithmScalar<CV_32F, 1, nppiAddC_32f_C1R >::call, NppArithmScalar<CV_32F, 2, nppiAddC_32fc_C1R >::call, NppArithmScalar<CV_32F, 3, nppiAddC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiAddC_32f_C4R >::call},\r
+ {0 , 0 , 0 , 0 }\r
};\r
\r
- CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));\r
-\r
if (dtype < 0)\r
dtype = src.depth();\r
\r
+ CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src.channels() <= 4);\r
+ CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));\r
+\r
+ if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
CV_Assert(src.channels() == 1);\r
\r
const func_t func = funcs[src.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(src, sc.val[0], dst, mask, stream);\r
}\r
\r
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},\r
- {0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},\r
- {0/*subtract_gpu<unsigned short, unsigned char>*/, 0/*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short>, 0/*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int>, subtract_gpu<unsigned short, float>, subtract_gpu<unsigned short, double>},\r
- {0/*subtract_gpu<short, unsigned char>*/, 0/*subtract_gpu<short, signed char>*/, 0/*subtract_gpu<short, unsigned short>*/, subtract_gpu<short, short>, subtract_gpu<short, int>, subtract_gpu<short, float>, subtract_gpu<short, double>},\r
- {0/*subtract_gpu<int, unsigned char>*/, 0/*subtract_gpu<int, signed char>*/, 0/*subtract_gpu<int, unsigned short>*/, 0/*subtract_gpu<int, short>*/, subtract_gpu<int, int>, subtract_gpu<int, float>, subtract_gpu<int, double>},\r
- {0/*subtract_gpu<float, unsigned char>*/, 0/*subtract_gpu<float, signed char>*/, 0/*subtract_gpu<float, unsigned short>*/, 0/*subtract_gpu<float, short>*/, 0/*subtract_gpu<float, int>*/, subtract_gpu<float, float>, subtract_gpu<float, double>},\r
- {0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}\r
+ {subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },\r
+ {0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/, 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/, 0 /*subtract_gpu<signed char, float>*/, 0 /*subtract_gpu<signed char, double>*/},\r
+ {0 /*subtract_gpu<unsigned short, unsigned char>*/, 0 /*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short> , 0 /*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int> , subtract_gpu<unsigned short, float> , subtract_gpu<unsigned short, double> },\r
+ {0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu<short, short> , subtract_gpu<short, int> , subtract_gpu<short, float> , subtract_gpu<short, double> },\r
+ {0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu<int, int> , subtract_gpu<int, float> , subtract_gpu<int, double> },\r
+ {0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu<float, float> , subtract_gpu<float, double> },\r
+ {0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }\r
};\r
\r
- static const func_t npp_funcs[6] =\r
+ typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);\r
+ static const npp_func_t npp_funcs[6] =\r
{\r
- NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call,\r
+ NppArithm<CV_8U , nppiSub_8u_C1RSfs>::call,\r
0,\r
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,\r
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,\r
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,\r
- NppArithm<CV_32F, nppiSub_32f_C1R>::call\r
+ NppArithm<CV_32F, nppiSub_32f_C1R >::call\r
};\r
\r
- CV_Assert(src1.type() != CV_8S);\r
+ if (dtype < 0)\r
+ dtype = src1.depth();\r
+\r
+ CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));\r
\r
- if (dtype < 0)\r
- dtype = src1.depth();\r
+ if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
\r
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));\r
\r
\r
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)\r
{\r
- npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream);\r
+ npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream);\r
return;\r
}\r
\r
const func_t func = funcs[src1.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);\r
}\r
\r
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},\r
- {0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},\r
- {0/*subtract_gpu<unsigned short, unsigned char>*/, 0/*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short>, 0/*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int>, subtract_gpu<unsigned short, float>, subtract_gpu<unsigned short, double>},\r
- {0/*subtract_gpu<short, unsigned char>*/, 0/*subtract_gpu<short, signed char>*/, 0/*subtract_gpu<short, unsigned short>*/, subtract_gpu<short, short>, subtract_gpu<short, int>, subtract_gpu<short, float>, subtract_gpu<short, double>},\r
- {0/*subtract_gpu<int, unsigned char>*/, 0/*subtract_gpu<int, signed char>*/, 0/*subtract_gpu<int, unsigned short>*/, 0/*subtract_gpu<int, short>*/, subtract_gpu<int, int>, subtract_gpu<int, float>, subtract_gpu<int, double>},\r
- {0/*subtract_gpu<float, unsigned char>*/, 0/*subtract_gpu<float, signed char>*/, 0/*subtract_gpu<float, unsigned short>*/, 0/*subtract_gpu<float, short>*/, 0/*subtract_gpu<float, int>*/, subtract_gpu<float, float>, subtract_gpu<float, double>},\r
- {0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}\r
+ {subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },\r
+ {0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/, 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/, 0 /*subtract_gpu<signed char, float>*/, 0 /*subtract_gpu<signed char, double>*/},\r
+ {0 /*subtract_gpu<unsigned short, unsigned char>*/, 0 /*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short> , 0 /*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int> , subtract_gpu<unsigned short, float> , subtract_gpu<unsigned short, double> },\r
+ {0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu<short, short> , subtract_gpu<short, int> , subtract_gpu<short, float> , subtract_gpu<short, double> },\r
+ {0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu<int, int> , subtract_gpu<int, float> , subtract_gpu<int, double> },\r
+ {0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu<float, float> , subtract_gpu<float, double> },\r
+ {0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }\r
};\r
\r
- typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);\r
+ typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);\r
static const npp_func_t npp_funcs[7][4] =\r
{\r
- {NppArithmScalar<CV_8U, 1, nppiSubC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiSubC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiSubC_8u_C4RSfs>::call},\r
- {0,0,0,0},\r
- {NppArithmScalar<CV_16U, 1, nppiSubC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiSubC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiSubC_16u_C4RSfs>::call},\r
+ {NppArithmScalar<CV_8U , 1, nppiSubC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiSubC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiSubC_8u_C4RSfs >::call},\r
+ {0 , 0 , 0 , 0 },\r
+ {NppArithmScalar<CV_16U, 1, nppiSubC_16u_C1RSfs>::call, 0 , NppArithmScalar<CV_16U, 3, nppiSubC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiSubC_16u_C4RSfs>::call},\r
{NppArithmScalar<CV_16S, 1, nppiSubC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiSubC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiSubC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiSubC_16s_C4RSfs>::call},\r
- {NppArithmScalar<CV_32S, 1, nppiSubC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiSubC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiSubC_32s_C3RSfs>::call, 0},\r
- {NppArithmScalar<CV_32F, 1, nppiSubC_32f_C1R>::call, NppArithmScalar<CV_32F, 2, nppiSubC_32fc_C1R>::call, NppArithmScalar<CV_32F, 3, nppiSubC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiSubC_32f_C4R>::call},\r
- {0,0,0,0}\r
+ {NppArithmScalar<CV_32S, 1, nppiSubC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiSubC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiSubC_32s_C3RSfs>::call, 0 },\r
+ {NppArithmScalar<CV_32F, 1, nppiSubC_32f_C1R >::call, NppArithmScalar<CV_32F, 2, nppiSubC_32fc_C1R >::call, NppArithmScalar<CV_32F, 3, nppiSubC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiSubC_32f_C4R >::call},\r
+ {0 , 0 , 0 , 0 }\r
};\r
\r
- CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));\r
-\r
if (dtype < 0)\r
dtype = src.depth();\r
\r
+ CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src.channels() <= 4);\r
+ CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));\r
+\r
+ if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
CV_Assert(src.channels() == 1);\r
\r
const func_t func = funcs[src.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(src, sc.val[0], dst, mask, stream);\r
}\r
\r
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
-\r
- typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
-\r
- static const func_t funcs[7][7] =\r
- {\r
- {multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},\r
- {0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},\r
- {0/*multiply_gpu<unsigned short, unsigned char>*/, 0/*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short>, 0/*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int>, multiply_gpu<unsigned short, float>, multiply_gpu<unsigned short, double>},\r
- {0/*multiply_gpu<short, unsigned char>*/, 0/*multiply_gpu<short, signed char>*/, 0/*multiply_gpu<short, unsigned short>*/, multiply_gpu<short, short>, multiply_gpu<short, int>, multiply_gpu<short, float>, multiply_gpu<short, double>},\r
- {0/*multiply_gpu<int, unsigned char>*/, 0/*multiply_gpu<int, signed char>*/, 0/*multiply_gpu<int, unsigned short>*/, 0/*multiply_gpu<int, short>*/, multiply_gpu<int, int>, multiply_gpu<int, float>, multiply_gpu<int, double>},\r
- {0/*multiply_gpu<float, unsigned char>*/, 0/*multiply_gpu<float, signed char>*/, 0/*multiply_gpu<float, unsigned short>*/, 0/*multiply_gpu<float, short>*/, 0/*multiply_gpu<float, int>*/, multiply_gpu<float, float>, multiply_gpu<float, double>},\r
- {0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>}\r
- };\r
-\r
- static const func_t npp_funcs[7] =\r
- {\r
- NppArithm<CV_8U, nppiMul_8u_C1RSfs>::call,\r
- 0,\r
- NppArithm<CV_16U, nppiMul_16u_C1RSfs>::call,\r
- NppArithm<CV_16S, nppiMul_16s_C1RSfs>::call,\r
- NppArithm<CV_32S, nppiMul_32s_C1RSfs>::call,\r
- NppArithm<CV_32F, nppiMul_32f_C1R>::call,\r
- multiply_gpu<double, double>\r
- };\r
+ using namespace cv::gpu::device;\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
\r
}\r
else\r
{\r
- CV_Assert(src1.type() != CV_8S);\r
- CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
+ typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
+ static const func_t funcs[7][7] =\r
+ {\r
+ {multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },\r
+ {0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/, 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/, 0 /*multiply_gpu<signed char, float>*/, 0 /*multiply_gpu<signed char, double>*/},\r
+ {0 /*multiply_gpu<unsigned short, unsigned char>*/, 0 /*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short> , 0 /*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int> , multiply_gpu<unsigned short, float> , multiply_gpu<unsigned short, double> },\r
+ {0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu<short, short> , multiply_gpu<short, int> , multiply_gpu<short, float> , multiply_gpu<short, double> },\r
+ {0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu<int, int> , multiply_gpu<int, float> , multiply_gpu<int, double> },\r
+ {0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu<float, float> , multiply_gpu<float, double> },\r
+ {0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }\r
+ };\r
+\r
+ typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);\r
+ static const npp_func_t npp_funcs[] =\r
+ {\r
+ NppArithm<CV_8U , nppiMul_8u_C1RSfs >::call,\r
+ 0,\r
+ NppArithm<CV_16U, nppiMul_16u_C1RSfs>::call,\r
+ NppArithm<CV_16S, nppiMul_16s_C1RSfs>::call,\r
+ NppArithm<CV_32S, nppiMul_32s_C1RSfs>::call,\r
+ NppArithm<CV_32F, nppiMul_32f_C1R >::call\r
+ };\r
\r
if (dtype < 0)\r
dtype = src1.depth();\r
\r
+ CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
+\r
+ if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));\r
\r
- if (scale == 1 && dst.type() == src1.type())\r
+ if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)\r
{\r
- npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), 1, stream);\r
+ npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);\r
return;\r
}\r
\r
const func_t func = funcs[src1.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream);\r
}\r
\r
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},\r
- {0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},\r
- {0/*multiply_gpu<unsigned short, unsigned char>*/, 0/*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short>, 0/*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int>, multiply_gpu<unsigned short, float>, multiply_gpu<unsigned short, double>},\r
- {0/*multiply_gpu<short, unsigned char>*/, 0/*multiply_gpu<short, signed char>*/, 0/*multiply_gpu<short, unsigned short>*/, multiply_gpu<short, short>, multiply_gpu<short, int>, multiply_gpu<short, float>, multiply_gpu<short, double>},\r
- {0/*multiply_gpu<int, unsigned char>*/, 0/*multiply_gpu<int, signed char>*/, 0/*multiply_gpu<int, unsigned short>*/, 0/*multiply_gpu<int, short>*/, multiply_gpu<int, int>, multiply_gpu<int, float>, multiply_gpu<int, double>},\r
- {0/*multiply_gpu<float, unsigned char>*/, 0/*multiply_gpu<float, signed char>*/, 0/*multiply_gpu<float, unsigned short>*/, 0/*multiply_gpu<float, short>*/, 0/*multiply_gpu<float, int>*/, multiply_gpu<float, float>, multiply_gpu<float, double>},\r
- {0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>}\r
+ {multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },\r
+ {0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/, 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/, 0 /*multiply_gpu<signed char, float>*/, 0 /*multiply_gpu<signed char, double>*/},\r
+ {0 /*multiply_gpu<unsigned short, unsigned char>*/, 0 /*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short> , 0 /*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int> , multiply_gpu<unsigned short, float> , multiply_gpu<unsigned short, double> },\r
+ {0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu<short, short> , multiply_gpu<short, int> , multiply_gpu<short, float> , multiply_gpu<short, double> },\r
+ {0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu<int, int> , multiply_gpu<int, float> , multiply_gpu<int, double> },\r
+ {0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu<float, float> , multiply_gpu<float, double> },\r
+ {0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }\r
};\r
\r
- typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);\r
+ typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);\r
static const npp_func_t npp_funcs[7][4] =\r
{\r
- {NppArithmScalar<CV_8U, 1, nppiMulC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiMulC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiMulC_8u_C4RSfs>::call},\r
- {0,0,0,0},\r
+ {NppArithmScalar<CV_8U , 1, nppiMulC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiMulC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiMulC_8u_C4RSfs >::call},\r
+ {0 , 0, 0 , 0 },\r
{NppArithmScalar<CV_16U, 1, nppiMulC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiMulC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiMulC_16u_C4RSfs>::call},\r
{NppArithmScalar<CV_16S, 1, nppiMulC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiMulC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiMulC_16s_C4RSfs>::call},\r
- {NppArithmScalar<CV_32S, 1, nppiMulC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiMulC_32s_C3RSfs>::call, 0},\r
- {NppArithmScalar<CV_32F, 1, nppiMulC_32f_C1R>::call, 0, NppArithmScalar<CV_32F, 3, nppiMulC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiMulC_32f_C4R>::call},\r
- {0,0,0,0}\r
+ {NppArithmScalar<CV_32S, 1, nppiMulC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiMulC_32s_C3RSfs>::call, 0 },\r
+ {NppArithmScalar<CV_32F, 1, nppiMulC_32f_C1R >::call, 0, NppArithmScalar<CV_32F, 3, nppiMulC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiMulC_32f_C4R >::call},\r
+ {0 , 0, 0 , 0 }\r
};\r
\r
if (dtype < 0)\r
dtype = src.depth();\r
\r
+ CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src.channels() <= 4);\r
+\r
+ if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
\r
- if (dst.type() == src.type() && scale == 1)\r
+ if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc)))\r
{\r
const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1];\r
\r
- if (npp_func && (src.depth() == CV_32F || isIntScalar(sc)))\r
+ if (npp_func)\r
{\r
npp_func(src, sc, dst, stream);\r
return;\r
}\r
}\r
\r
+ CV_Assert(src.channels() == 1);\r
+\r
const func_t func = funcs[src.depth()][dst.depth()];\r
\r
- CV_Assert(func != 0);\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
- func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream);\r
+ func(src, sc.val[0], dst, scale, stream);\r
}\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
-\r
- typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
-\r
- static const func_t funcs[7][7] =\r
- {\r
- {divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},\r
- {0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},\r
- {0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>},\r
- {0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>},\r
- {0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>},\r
- {0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>},\r
- {0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}\r
- };\r
-\r
- static const func_t npp_funcs[6] =\r
- {\r
- NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,\r
- 0,\r
- NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,\r
- NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,\r
- NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,\r
- NppArithm<CV_32F, nppiDiv_32f_C1R>::call\r
- };\r
+ using namespace cv::gpu::device;\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
\r
}\r
else\r
{\r
- CV_Assert(src1.type() != CV_8S);\r
- CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
+ typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
+ static const func_t funcs[7][7] =\r
+ {\r
+ {divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },\r
+ {0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},\r
+ {0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },\r
+ {0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },\r
+ {0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },\r
+ {0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },\r
+ {0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }\r
+ };\r
+\r
+ typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);\r
+ static const npp_func_t npp_funcs[6] =\r
+ {\r
+ NppArithm<CV_8U , nppiDiv_8u_C1RSfs >::call,\r
+ 0,\r
+ NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,\r
+ NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,\r
+ NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,\r
+ NppArithm<CV_32F, nppiDiv_32f_C1R >::call\r
+ };\r
\r
if (dtype < 0)\r
dtype = src1.depth();\r
\r
+ CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());\r
+\r
+ if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));\r
\r
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)\r
{\r
- npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream);\r
+ npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream);\r
return;\r
}\r
\r
const func_t func = funcs[src1.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream);\r
}\r
\r
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},\r
- {0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},\r
- {0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>},\r
- {0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>},\r
- {0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>},\r
- {0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>},\r
- {0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}\r
+ {divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },\r
+ {0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},\r
+ {0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },\r
+ {0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },\r
+ {0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },\r
+ {0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },\r
+ {0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }\r
};\r
\r
- typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);\r
+ typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);\r
static const npp_func_t npp_funcs[7][4] =\r
{\r
- {NppArithmScalar<CV_8U, 1, nppiDivC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiDivC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiDivC_8u_C4RSfs>::call},\r
- {0,0,0,0},\r
+ {NppArithmScalar<CV_8U , 1, nppiDivC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiDivC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiDivC_8u_C4RSfs >::call},\r
+ {0 , 0, 0 , 0 },\r
{NppArithmScalar<CV_16U, 1, nppiDivC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiDivC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiDivC_16u_C4RSfs>::call},\r
{NppArithmScalar<CV_16S, 1, nppiDivC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiDivC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiDivC_16s_C4RSfs>::call},\r
- {NppArithmScalar<CV_32S, 1, nppiDivC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiDivC_32s_C3RSfs>::call, 0},\r
- {NppArithmScalar<CV_32F, 1, nppiDivC_32f_C1R>::call, 0, NppArithmScalar<CV_32F, 3, nppiDivC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiDivC_32f_C4R>::call},\r
- {0,0,0,0}\r
+ {NppArithmScalar<CV_32S, 1, nppiDivC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiDivC_32s_C3RSfs>::call, 0 },\r
+ {NppArithmScalar<CV_32F, 1, nppiDivC_32f_C1R >::call, 0, NppArithmScalar<CV_32F, 3, nppiDivC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiDivC_32f_C4R >::call},\r
+ {0 , 0, 0 , 0 }\r
};\r
\r
if (dtype < 0)\r
dtype = src.depth();\r
\r
+ CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src.channels() <= 4);\r
+\r
+ if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
\r
- if (dst.type() == src.type() && scale == 1)\r
+ if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc)))\r
{\r
const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1];\r
\r
- if (npp_func && (src.depth() == CV_32F || isIntScalar(sc)))\r
+ if (npp_func)\r
{\r
npp_func(src, sc, dst, stream);\r
return;\r
}\r
}\r
\r
+ CV_Assert(src.channels() == 1);\r
+\r
const func_t func = funcs[src.depth()][dst.depth()];\r
\r
- CV_Assert(func != 0);\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
- func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream);\r
+ func(src, sc.val[0], dst, scale, stream);\r
}\r
\r
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)\r
{\r
- using namespace ::cv::gpu::device;\r
+ using namespace cv::gpu::device;\r
\r
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);\r
-\r
static const func_t funcs[7][7] =\r
{\r
- {divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},\r
- {0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},\r
- {0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>},\r
- {0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>},\r
- {0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>},\r
- {0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>},\r
- {0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}\r
+ {divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },\r
+ {0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},\r
+ {0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },\r
+ {0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },\r
+ {0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },\r
+ {0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },\r
+ {0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }\r
};\r
\r
- CV_Assert(src.channels() == 1);\r
-\r
if (dtype < 0)\r
dtype = src.depth();\r
\r
+ CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);\r
+ CV_Assert(src.channels() == 1);\r
+\r
+ if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)\r
+ {\r
+ if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))\r
+ CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");\r
+ }\r
+\r
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));\r
\r
cudaStream_t stream = StreamAccessor::getStream(s);\r
\r
const func_t func = funcs[src.depth()][dst.depth()];\r
- CV_Assert(func != 0);\r
+\r
+ if (!func)\r
+ CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");\r
\r
func(scale, src, dst, stream);\r
}\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
int channels;\r
bool useRoi;\r
\r
cv::Mat mat2 = randomMat(size, stype);\r
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
- dst.setTo(cv::Scalar::all(0));\r
- cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);\r
\r
- cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));\r
- cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);\r
+ cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));\r
+ cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
bool useRoi;\r
\r
virtual void SetUp()\r
}\r
};\r
\r
-TEST_P(Add_Scalar, Accuracy)\r
+TEST_P(Add_Scalar, WithOutMask)\r
+{\r
+ cv::Mat mat = randomMat(size, depth.first);\r
+ cv::Scalar val = randomScalar(0, 255);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::add(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);\r
+\r
+ cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
+ cv::add(mat, val, dst_gold, cv::noArray(), depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
+}\r
+\r
+TEST_P(Add_Scalar, WithMask)\r
{\r
cv::Mat mat = randomMat(size, depth.first);\r
cv::Scalar val = randomScalar(0, 255);\r
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
- dst.setTo(cv::Scalar::all(0));\r
- cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);\r
\r
- cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
- cv::add(mat, val, dst_gold, mask, depth.second);\r
+ cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
+ cv::add(mat, val, dst_gold, mask, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
int channels;\r
bool useRoi;\r
\r
cv::Mat mat2 = randomMat(size, stype);\r
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
- dst.setTo(cv::Scalar::all(0));\r
- cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);\r
\r
- cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));\r
- cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);\r
+ cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));\r
+ cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
bool useRoi;\r
\r
virtual void SetUp()\r
}\r
};\r
\r
-TEST_P(Subtract_Scalar, Accuracy)\r
+TEST_P(Subtract_Scalar, WithOutMask)\r
+{\r
+ cv::Mat mat = randomMat(size, depth.first);\r
+ cv::Scalar val = randomScalar(0, 255);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::subtract(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);\r
+\r
+ cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
+ cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
+}\r
+\r
+TEST_P(Subtract_Scalar, WithMask)\r
{\r
cv::Mat mat = randomMat(size, depth.first);\r
cv::Scalar val = randomScalar(0, 255);\r
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
- dst.setTo(cv::Scalar::all(0));\r
- cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ dst.setTo(cv::Scalar::all(0));\r
+ cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);\r
\r
- cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
- cv::subtract(mat, val, dst_gold, mask, depth.second);\r
+ cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));\r
+ cv::subtract(mat, val, dst_gold, mask, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
int channels;\r
bool useRoi;\r
\r
}\r
};\r
\r
-TEST_P(Multiply_Array, Accuracy)\r
+TEST_P(Multiply_Array, WithOutScale)\r
+{\r
+ cv::Mat mat1 = randomMat(size, stype);\r
+ cv::Mat mat2 = randomMat(size, stype);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::multiply(mat1, mat2, dst_gold, 1, depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
+}\r
+\r
+TEST_P(Multiply_Array, WithScale)\r
{\r
cv::Mat mat1 = randomMat(size, stype);\r
cv::Mat mat2 = randomMat(size, stype);\r
double scale = randomDouble(0.0, 255.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
- cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);\r
\r
- cv::Mat dst_gold;\r
- cv::multiply(mat1, mat2, dst_gold, scale, depth.second);\r
+ cv::Mat dst_gold;\r
+ cv::multiply(mat1, mat2, dst_gold, scale, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, 1.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
bool useRoi;\r
\r
virtual void SetUp()\r
}\r
};\r
\r
-TEST_P(Multiply_Scalar, Accuracy)\r
+TEST_P(Multiply_Scalar, WithOutScale)\r
+{\r
+ cv::Mat mat = randomMat(size, depth.first);\r
+ cv::Scalar val = randomScalar(0, 255);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::multiply(loadMat(mat), val, dst, 1, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ cv::gpu::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::multiply(mat, val, dst_gold, 1, depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);\r
+ }\r
+}\r
+\r
+\r
+TEST_P(Multiply_Scalar, WithScale)\r
{\r
cv::Mat mat = randomMat(size, depth.first);\r
cv::Scalar val = randomScalar(0, 255);\r
double scale = randomDouble(0.0, 255.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
- cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);\r
\r
- cv::Mat dst_gold;\r
- cv::multiply(mat, val, dst_gold, scale, depth.second);\r
+ cv::Mat dst_gold;\r
+ cv::multiply(mat, val, dst_gold, scale, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
int channels;\r
bool useRoi;\r
\r
}\r
};\r
\r
-TEST_P(Divide_Array, Accuracy)\r
+TEST_P(Divide_Array, WithOutScale)\r
+{\r
+ cv::Mat mat1 = randomMat(size, stype);\r
+ cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::divide(mat1, mat2, dst_gold, 1, depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);\r
+ }\r
+}\r
+\r
+\r
+TEST_P(Divide_Array, WithScale)\r
{\r
cv::Mat mat1 = randomMat(size, stype);\r
cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);\r
double scale = randomDouble(0.0, 255.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
- cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);\r
+ cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);\r
\r
- cv::Mat dst_gold;\r
- cv::divide(mat1, mat2, dst_gold, scale, depth.second);\r
+ cv::Mat dst_gold;\r
+ cv::divide(mat1, mat2, dst_gold, scale, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, 1.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
bool useRoi;\r
\r
virtual void SetUp()\r
}\r
};\r
\r
-TEST_P(Divide_Scalar, Accuracy)\r
+TEST_P(Divide_Scalar, WithOutScale)\r
+{\r
+ cv::Mat mat = randomMat(size, depth.first);\r
+ cv::Scalar val = randomScalar(1.0, 255.0);\r
+\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::divide(loadMat(mat), val, dst, 1, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ cv::gpu::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);\r
+\r
+ cv::Mat dst_gold;\r
+ cv::divide(mat, val, dst_gold, 1, depth.second);\r
+\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
+}\r
+\r
+TEST_P(Divide_Scalar, WithScale)\r
{\r
cv::Mat mat = randomMat(size, depth.first);\r
cv::Scalar val = randomScalar(1.0, 255.0);\r
double scale = randomDouble(0.0, 255.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
- cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);\r
\r
- cv::Mat dst_gold;\r
- cv::divide(mat, val, dst_gold, scale, depth.second);\r
+ cv::Mat dst_gold;\r
+ cv::divide(mat, val, dst_gold, scale, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar, testing::Combine(\r
{\r
cv::gpu::DeviceInfo devInfo;\r
cv::Size size;\r
- std::pair<MatType, MatType> depth;\r
+ std::pair<MatDepth, MatDepth> depth;\r
bool useRoi;\r
\r
virtual void SetUp()\r
double scale = randomDouble(0.0, 255.0);\r
cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0);\r
\r
- cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
- cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);\r
+ if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))\r
+ {\r
+ try\r
+ {\r
+ cv::gpu::GpuMat dst;\r
+ cv::gpu::divide(scale, loadMat(mat), dst, depth.second);\r
+ }\r
+ catch (const cv::Exception& e)\r
+ {\r
+ ASSERT_EQ(CV_StsUnsupportedFormat, e.code);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);\r
+ cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);\r
\r
- cv::Mat dst_gold;\r
- cv::divide(scale, mat, dst_gold, depth.second);\r
+ cv::Mat dst_gold;\r
+ cv::divide(scale, mat, dst_gold, depth.second);\r
\r
- EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);\r
+ }\r
}\r
\r
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar_Inv, testing::Combine(\r