enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
-static bool ocl_sum( InputArray _src, Scalar & res, int sum_op )
+static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray() )
{
CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
dtype = CV_MAKE_TYPE(ddepth, cn);
+ bool haveMask = _mask.kind() != _InputArray::NONE;
+ CV_Assert(!haveMask || _mask.type() == CV_8UC1);
int wgs2_aligned = 1;
while (wgs2_aligned < (int)wgs)
static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
char cvt[40];
ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
- format("-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s",
+ format("-D srcT=%s -D dstT=%s -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
ocl::typeToStr(type), ocl::typeToStr(dtype), ocl::convertTypeStr(depth, ddepth, cn, cvt),
opMap[sum_op], (int)wgs, wgs2_aligned,
- doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+ doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+ haveMask ? " -D HAVE_MASK" : ""));
if (k.empty())
return false;
- UMat src = _src.getUMat(), db(1, dbsize, dtype);
- k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
- dbsize, ocl::KernelArg::PtrWriteOnly(db));
+ UMat src = _src.getUMat(), db(1, dbsize, dtype), mask = _mask.getUMat();
+
+ ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
+ dbarg = ocl::KernelArg::PtrWriteOnly(db),
+ maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
+
+ if (haveMask)
+ k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg, maskarg);
+ else
+ k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg);
size_t globalsize = dbsize * wgs;
if (k.run(1, &globalsize, &wgs, false))
namespace cv {
-static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv )
+static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
+ bool haveMask = _mask.kind() != _InputArray::NONE;
+
Scalar mean, stddev;
- if (!ocl_sum(_src, mean, OCL_OP_SUM))
+ if (!ocl_sum(_src, mean, OCL_OP_SUM, _mask))
return false;
- if (!ocl_sum(_src, stddev, OCL_OP_SUM_SQR))
+ if (!ocl_sum(_src, stddev, OCL_OP_SUM_SQR, _mask))
return false;
- double total = 1.0 / _src.total();
+ double total = 1.0 / (haveMask ? countNonZero(_mask) : _src.total());
int k, j, cn = _src.channels();
for (int i = 0; i < cn; ++i)
{
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
- if (ocl::useOpenCL() && _src.isUMat() && _mask.empty() && ocl_meanStdDev(_src, _mean, _sdv))
+ if (ocl::useOpenCL() && _src.isUMat() && ocl_meanStdDev(_src, _mean, _sdv, _mask))
return;
Mat src = _src.getMat(), mask = _mask.getMat();
namespace cv {
-static bool ocl_norm( InputArray _src, int normType, double & result )
+static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
- bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+ bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
+ haveMask = _mask.kind() != _InputArray::NONE;
if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2) ||
- (!doubleSupport && depth == CV_64F))
+ (!doubleSupport && depth == CV_64F) || (normType == NORM_INF && haveMask && cn != 1))
return false;
UMat src = _src.getUMat();
else
abssrc = src;
- cv::minMaxIdx(abssrc.reshape(1), NULL, &result);
+ cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask);
}
else if (normType == NORM_L1 || normType == NORM_L2)
{
- Scalar s;
+ Scalar sc;
bool unstype = depth == CV_8U || depth == CV_16U;
- if ( !ocl_sum(src.reshape(1), s, normType == NORM_L2 ?
- OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS)) )
+ if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 ?
+ OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
return false;
- result = normType == NORM_L1 ? s[0] : std::sqrt(s[0]);
+
+ if (!haveMask)
+ cn = 1;
+
+ double s = 0.0;
+ for (int i = 0; i < cn; ++i)
+ s += sc[i];
+
+ result = normType == NORM_L1 ? s : std::sqrt(s);
}
return true;
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
double _result = 0;
- if (ocl::useOpenCL() && _mask.empty() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _result))
+ if (ocl::useOpenCL() && _src.isUMat() && _src.dims() <= 2 && ocl_norm(_src, normType, _mask, _result))
return _result;
Mat src = _src.getMat(), mask = _mask.getMat();
}
}
+OCL_TEST_P(MeanStdDev, Mat_Mask)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ Scalar cpu_mean, cpu_stddev;
+ Scalar gpu_mean, gpu_stddev;
+
+ OCL_OFF(cv::meanStdDev(src1_roi, cpu_mean, cpu_stddev, mask_roi));
+ OCL_ON(cv::meanStdDev(usrc1_roi, gpu_mean, gpu_stddev, umask_roi));
+
+ for (int i = 0; i < cn; ++i)
+ {
+ EXPECT_NEAR(cpu_mean[i], gpu_mean[i], 0.1);
+ EXPECT_NEAR(cpu_stddev[i], gpu_stddev[i], 0.1);
+ }
+ }
+}
//////////////////////////////////////// Log /////////////////////////////////////////
}
}
+OCL_TEST_P(Norm, NORM_INF_1arg_mask)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_INF, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_INF, umask_roi));
+
+ EXPECT_NEAR(cpuRes, gpuRes, 0.1);
+ }
+}
+
OCL_TEST_P(Norm, NORM_L1_1arg)
{
for (int j = 0; j < test_loop_times; j++)
}
}
+OCL_TEST_P(Norm, NORM_L1_1arg_mask)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L1, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L1, umask_roi));
+
+ EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+ }
+}
+
OCL_TEST_P(Norm, NORM_L2_1arg)
{
for (int j = 0; j < test_loop_times; j++)
}
}
+OCL_TEST_P(Norm, NORM_L2_1arg_mask)
+{
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, NORM_L2, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, NORM_L2, umask_roi));
+
+ EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+ }
+}
+
OCL_TEST_P(Norm, NORM_INF_2args)
{
for (int relative = 0; relative < 2; ++relative)
}
}
+OCL_TEST_P(Norm, NORM_INF_2args_mask)
+{
+ for (int relative = 0; relative < 2; ++relative)
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ int type = NORM_INF;
+ if (relative == 1)
+ type |= NORM_RELATIVE;
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi));
+
+ EXPECT_NEAR(cpuRes, gpuRes, 0.1);
+ }
+}
+
OCL_TEST_P(Norm, NORM_L1_2args)
{
for (int relative = 0; relative < 2; ++relative)
}
}
+OCL_TEST_P(Norm, NORM_L1_2args_mask)
+{
+ for (int relative = 0; relative < 2; ++relative)
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ int type = NORM_L1;
+ if (relative == 1)
+ type |= NORM_RELATIVE;
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi));
+
+ EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+ }
+}
+
OCL_TEST_P(Norm, NORM_L2_2args)
{
for (int relative = 0; relative < 2; ++relative)
}
}
+OCL_TEST_P(Norm, NORM_L2_2args_mask)
+{
+ for (int relative = 0; relative < 2; ++relative)
+ for (int j = 0; j < test_loop_times; j++)
+ {
+ generateTestData();
+
+ int type = NORM_L2;
+ if (relative == 1)
+ type |= NORM_RELATIVE;
+
+ OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi));
+ OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi));
+
+ EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
+ }
+}
+
//////////////////////////////// Sqrt ////////////////////////////////////////////////
typedef ArithmTestBase Sqrt;