From 6ee0b6eb56148d5bf240d74a51e2ffe665040306 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 6 Jan 2014 00:12:13 +0400 Subject: [PATCH] added mask support to cv::norm, cv::meanStdDev --- modules/core/include/opencv2/core/mat.inl.hpp | 2 +- modules/core/src/opencl/reduce.cl | 15 +++- modules/core/src/stat.cpp | 61 +++++++++----- modules/core/test/ocl/test_arithm.cpp | 112 ++++++++++++++++++++++++++ 4 files changed, 168 insertions(+), 22 deletions(-) diff --git a/modules/core/include/opencv2/core/mat.inl.hpp b/modules/core/include/opencv2/core/mat.inl.hpp index f02bf9d..3079548 100644 --- a/modules/core/include/opencv2/core/mat.inl.hpp +++ b/modules/core/include/opencv2/core/mat.inl.hpp @@ -60,7 +60,7 @@ inline void _InputArray::init(int _flags, const void* _obj, Size _sz) inline void* _InputArray::getObj() const { return obj; } -inline _InputArray::_InputArray() { init(0, 0); } +inline _InputArray::_InputArray() { init(NONE, 0); } inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); } inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); } inline _InputArray::_InputArray(const std::vector& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); } diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 0f148f3..7a35314 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -51,7 +51,12 @@ #endif #define noconvert + +#ifdef HAVE_MASK +#define EXTRA_PARAMS , __global const uchar * mask, int mask_step, int mask_offset +#else #define EXTRA_PARAMS +#endif #if defined OP_SUM || defined OP_SUM_ABS || defined OP_SUM_SQR #if OP_SUM @@ -65,11 +70,19 @@ __local dstT localmem[WGS2_ALIGNED] #define DEFINE_ACCUMULATOR \ dstT accumulator = (dstT)(0) +#ifdef HAVE_MASK +#define REDUCE_GLOBAL \ + dstT temp = convertToDT(src[0]); \ + int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols)); \ + if (mask[mask_index]) \ + FUNC(accumulator, temp) +#else #define REDUCE_GLOBAL \ dstT temp = convertToDT(src[0]); \ FUNC(accumulator, temp) +#endif #define SET_LOCAL_1 \ - localmem[lid] = accumulator + localmem[lid] = accumulator #define REDUCE_LOCAL_1 \ localmem[lid - WGS2_ALIGNED] += accumulator #define REDUCE_LOCAL_2 \ diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 932cad6..c3204d0 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -466,7 +466,7 @@ template Scalar ocl_part_sum(Mat m) enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 }; -static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) +static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray() ) { CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR); @@ -481,6 +481,8 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth), dtype = CV_MAKE_TYPE(ddepth, cn); + bool haveMask = _mask.kind() != _InputArray::NONE; + CV_Assert(!haveMask || _mask.type() == CV_8UC1); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) @@ -490,16 +492,24 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op ) static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" }; char cvt[40]; ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, - format("-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s", + format("-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s", ocl::typeToStr(type), ocl::typeToStr(dtype), ocl::convertTypeStr(depth, ddepth, cn, cvt), opMap[sum_op], (int)wgs, wgs2_aligned, - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + haveMask ? " -D HAVE_MASK" : "")); if (k.empty()) return false; - UMat src = _src.getUMat(), db(1, dbsize, dtype); - k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), - dbsize, ocl::KernelArg::PtrWriteOnly(db)); + UMat src = _src.getUMat(), db(1, dbsize, dtype), mask = _mask.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + dbarg = ocl::KernelArg::PtrWriteOnly(db), + maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); + + if (haveMask) + k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg, maskarg); + else + k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg); size_t globalsize = dbsize * wgs; if (k.run(1, &globalsize, &wgs, false)) @@ -807,15 +817,17 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask ) namespace cv { -static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv ) +static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) { + bool haveMask = _mask.kind() != _InputArray::NONE; + Scalar mean, stddev; - if (!ocl_sum(_src, mean, OCL_OP_SUM)) + if (!ocl_sum(_src, mean, OCL_OP_SUM, _mask)) return false; - if (!ocl_sum(_src, stddev, OCL_OP_SUM_SQR)) + if (!ocl_sum(_src, stddev, OCL_OP_SUM_SQR, _mask)) return false; - double total = 1.0 / _src.total(); + double total = 1.0 / (haveMask ? countNonZero(_mask) : _src.total()); int k, j, cn = _src.channels(); for (int i = 0; i < cn; ++i) { @@ -850,7 +862,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) { - if (ocl::useOpenCL() && _src.isUMat() && _mask.empty() && ocl_meanStdDev(_src, _mean, _sdv)) + if (ocl::useOpenCL() && _src.isUMat() && ocl_meanStdDev(_src, _mean, _sdv, _mask)) return; Mat src = _src.getMat(), mask = _mask.getMat(); @@ -1883,13 +1895,14 @@ static NormDiffFunc getNormDiffFunc(int normType, int depth) namespace cv { -static bool ocl_norm( InputArray _src, int normType, double & result ) +static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result ) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, + haveMask = _mask.kind() != _InputArray::NONE; if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) || - (!doubleSupport && depth == CV_64F)) + (!doubleSupport && depth == CV_64F) || (normType == NORM_INF && haveMask && cn != 1)) return false; UMat src = _src.getUMat(); @@ -1921,17 +1934,25 @@ static bool ocl_norm( InputArray _src, int normType, double & result ) else abssrc = src; - cv::minMaxIdx(abssrc.reshape(1), NULL, &result); + cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask); } else if (normType == NORM_L1 || normType == NORM_L2) { - Scalar s; + Scalar sc; bool unstype = depth == CV_8U || depth == CV_16U; - if ( !ocl_sum(src.reshape(1), s, normType == NORM_L2 ? - OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS)) ) + if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 ? + OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) ) return false; - result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]); + + if (!haveMask) + cn = 1; + + double s = 0.0; + for (int i = 0; i < cn; ++i) + s += sc[i]; + + result = normType == NORM_L1 ? s : std::sqrt(s); } return true; @@ -1947,7 +1968,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) ); double _result = 0; - if (ocl::useOpenCL() && _mask.empty() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _result)) + if (ocl::useOpenCL() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _mask, _result)) return _result; Mat src = _src.getMat(), mask = _mask.getMat(); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 607e906..6082b6d 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -924,6 +924,25 @@ OCL_TEST_P(MeanStdDev, Mat) } } +OCL_TEST_P(MeanStdDev, Mat_Mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + Scalar cpu_mean, cpu_stddev; + Scalar gpu_mean, gpu_stddev; + + OCL_OFF(cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev, mask_roi)); + OCL_ON(cv::meanStdDev(usrc1_roi, gpu_mean, gpu_stddev, umask_roi)); + + for (int i = 0; i < cn; ++i) + { + EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1); + EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1); + } + } +} //////////////////////////////////////// Log ///////////////////////////////////////// @@ -1124,6 +1143,19 @@ OCL_TEST_P(Norm, NORM_INF_1arg) } } +OCL_TEST_P(Norm, NORM_INF_1arg_mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_INF, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_INF, umask_roi)); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + OCL_TEST_P(Norm, NORM_L1_1arg) { for (int j = 0; j < test_loop_times; j++) @@ -1137,6 +1169,19 @@ OCL_TEST_P(Norm, NORM_L1_1arg) } } +OCL_TEST_P(Norm, NORM_L1_1arg_mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L1, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L1, umask_roi)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + OCL_TEST_P(Norm, NORM_L2_1arg) { for (int j = 0; j < test_loop_times; j++) @@ -1150,6 +1195,19 @@ OCL_TEST_P(Norm, NORM_L2_1arg) } } +OCL_TEST_P(Norm, NORM_L2_1arg_mask) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L2, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L2, umask_roi)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + OCL_TEST_P(Norm, NORM_INF_2args) { for (int relative = 0; relative < 2; ++relative) @@ -1168,6 +1226,24 @@ OCL_TEST_P(Norm, NORM_INF_2args) } } +OCL_TEST_P(Norm, NORM_INF_2args_mask) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_INF; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi)); + + EXPECT_NEAR(cpuRes, gpuRes, 0.1); + } +} + OCL_TEST_P(Norm, NORM_L1_2args) { for (int relative = 0; relative < 2; ++relative) @@ -1186,6 +1262,24 @@ OCL_TEST_P(Norm, NORM_L1_2args) } } +OCL_TEST_P(Norm, NORM_L1_2args_mask) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_L1; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + OCL_TEST_P(Norm, NORM_L2_2args) { for (int relative = 0; relative < 2; ++relative) @@ -1204,6 +1298,24 @@ OCL_TEST_P(Norm, NORM_L2_2args) } } +OCL_TEST_P(Norm, NORM_L2_2args_mask) +{ + for (int relative = 0; relative < 2; ++relative) + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + int type = NORM_L2; + if (relative == 1) + type |= NORM_RELATIVE; + + OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi)); + OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi)); + + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); + } +} + //////////////////////////////// Sqrt //////////////////////////////////////////////// typedef ArithmTestBase Sqrt; -- 2.7.4