From 5403bdd2286cd8a4fb2b2b688fd02f6d86361a4b Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sat, 7 Jun 2014 20:53:20 +0400 Subject: [PATCH] optimized cv::norm with NORM_RELATIVE --- modules/core/src/opencl/minmaxloc.cl | 6 +++-- modules/core/src/opencl/reduce.cl | 3 +++ modules/core/src/stat.cpp | 44 ++++++++++++++++++++++------------- modules/core/test/ocl/test_arithm.cpp | 12 ++++++++++ 4 files changed, 47 insertions(+), 18 deletions(-) diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index e3d87b0..11b6da9 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -76,7 +76,7 @@ #ifdef OP_CALC2 #define CALC_MAX2(p) \ if (maxval2 < temp.p) \ - maxval2 = temp.p + maxval2 = temp.p; #else #define CALC_MAX2(p) #endif @@ -171,6 +171,9 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #endif temp2 = convertToDT(*(__global const srcT *)(src2ptr + src2_index)); temp = temp > temp2 ? temp - temp2 : (temp2 - temp); +#ifdef OP_CALC2 + temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; +#endif #endif #if kercn == 1 @@ -192,7 +195,6 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #endif } #ifdef OP_CALC2 - temp2 = temp2 >= (dstT)(0) ? temp2 : -temp2; if (maxval2 < temp2) maxval2 = temp2; #endif diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index d535079..92818e3 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -580,6 +580,9 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset int id = get_global_id(0) * kercn; srcptr += src_offset; +#ifdef HAVE_SRC2 + src2ptr += src2_offset; +#endif DECLARE_LOCAL_MEM; DEFINE_ACCUMULATOR; diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index b405d6f..34c487a 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1334,17 +1334,17 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx) #ifdef HAVE_OPENCL template -void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, +void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, int* minLoc, int* maxLoc, - int groupnum, int cols) + int groupnum, int cols, double * maxVal2) { uint index_max = std::numeric_limits::max(); T minval = std::numeric_limits::max(); - T maxval = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(); + T maxval = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(), maxval2 = maxval; uint minloc = index_max, maxloc = index_max; int index = 0; - const T * minptr = NULL, * maxptr = NULL; + const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL; const uint * minlocptr = NULL, * maxlocptr = NULL; if (minVal || minLoc) { @@ -1362,7 +1362,12 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, index += sizeof(uint) * groupnum; } if (maxLoc) + { maxlocptr = (uint *)(db.data + index); + index += sizeof(uint) * groupnum; + } + if (maxVal2) + maxptr2 = (const T *)(db.data + index); for (int i = 0; i < groupnum; i++) { @@ -1394,6 +1399,8 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, maxval = maxptr[i]; } } + if (maxptr2 && maxptr2[i] > maxval2) + maxval2 = maxptr2[i]; } bool zero_mask = (minLoc && minloc == index_max) || (maxLoc && maxloc == index_max); @@ -1402,6 +1409,8 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, *minVal = zero_mask ? 0 : (double)minval; if (maxVal) *maxVal = zero_mask ? 0 : (double)maxval; + if (maxVal2) + *maxVal2 = zero_mask ? 0 : (double)maxval2; if (minLoc) { @@ -1415,20 +1424,21 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, } } -typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal, - int *minLoc, int *maxLoc, int gropunum, int cols); +typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal, + int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2); static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask, - int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), bool calc2 = false) + int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL) { - CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) || - (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) ); - const ocl::Device & dev = ocl::Device::getDefault(); bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), haveSrc2 = _src2.kind() != _InputArray::NONE; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src)); + + CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) || + (cn >= 1 && _mask.empty() && !minLoc && !maxLoc) ); + if (ddepth < 0) ddepth = depth; @@ -1471,7 +1481,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "", - haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "", + haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : ""); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); @@ -1481,7 +1491,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) + - (calc2 ? esz : 0)); + (maxVal2 ? esz : 0)); UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); if (cn > 1) @@ -1525,12 +1535,13 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* getMinMaxRes }; - getMinMaxResFunc func = functab[depth]; + getMinMaxResFunc func = functab[ddepth]; int locTemp[2]; func(db.getMat(ACCESS_READ), minVal, maxVal, needMinLoc ? minLoc ? minLoc : locTemp : minLoc, - needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, src.cols); + needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, + groupnum, src.cols, maxVal2); return true; } @@ -2560,9 +2571,10 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr } else { - if (!ocl_minMaxIdx(_src1, NULL, &result, NULL, NULL, _mask, std::max(CV_32S, depth), - false, _src2, relative)) + if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth), + false, _src2, relative ? &sc2[0] : NULL)) return false; + cn = 1; } double s2 = 0; diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index d396975..a7a09ca 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1293,6 +1293,8 @@ OCL_TEST_P(Norm, NORM_INF_2args) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_INF; if (relative == 1) type |= NORM_RELATIVE; @@ -1311,6 +1313,8 @@ OCL_TEST_P(Norm, NORM_INF_2args_mask) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_INF; if (relative == 1) type |= NORM_RELATIVE; @@ -1329,6 +1333,8 @@ OCL_TEST_P(Norm, NORM_L1_2args) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_L1; if (relative == 1) type |= NORM_RELATIVE; @@ -1347,6 +1353,8 @@ OCL_TEST_P(Norm, NORM_L1_2args_mask) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_L1; if (relative == 1) type |= NORM_RELATIVE; @@ -1365,6 +1373,8 @@ OCL_TEST_P(Norm, NORM_L2_2args) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_L2; if (relative == 1) type |= NORM_RELATIVE; @@ -1383,6 +1393,8 @@ OCL_TEST_P(Norm, NORM_L2_2args_mask) { generateTestData(); + SCOPED_TRACE(relative ? "NORM_RELATIVE" : ""); + int type = NORM_L2; if (relative == 1) type |= NORM_RELATIVE; -- 2.7.4