From: Ilya Lavrenov Date: Fri, 6 Jun 2014 13:15:19 +0000 (+0400) Subject: optimized cv::norm with NORM_INF X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~3186^2~4 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1a7a262f7457171e2084f3b8c1af93c21e89c64b;p=platform%2Fupstream%2Fopencv.git optimized cv::norm with NORM_INF --- diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index 558679e..2e48387 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -36,6 +36,7 @@ #define MAX_VAL DBL_MAX #endif +#define noconvert #define INDEX_MAX UINT_MAX #ifdef NEED_MINLOC @@ -93,20 +94,20 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #endif #ifdef NEED_MINVAL - __local srcT1 localmem_min[WGS2_ALIGNED]; + __local dstT1 localmem_min[WGS2_ALIGNED]; #ifdef NEED_MINLOC __local uint localmem_minloc[WGS2_ALIGNED]; #endif #endif #ifdef NEED_MAXVAL - __local srcT1 localmem_max[WGS2_ALIGNED]; + __local dstT1 localmem_max[WGS2_ALIGNED]; #ifdef NEED_MAXLOC __local uint localmem_maxloc[WGS2_ALIGNED]; #endif #endif - srcT1 minval = MAX_VAL, maxval = MIN_VAL; - srcT temp; + dstT1 minval = MAX_VAL, maxval = MIN_VAL; + dstT temp; uint minloc = INDEX_MAX, maxloc = INDEX_MAX; int src_index; #ifdef HAVE_MASK @@ -130,7 +131,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off if (mask[mask_index]) #endif { - temp = *(__global const srcT *)(srcptr + src_index); + temp = convertToDT(*(__global const srcT *)(srcptr + src_index)); #if kercn == 1 #ifdef NEED_MINVAL if (minval > temp) @@ -262,12 +263,12 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off { int pos = 0; #ifdef NEED_MINVAL - *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_min[0]; - pos = mad24(groupnum, (int)sizeof(srcT1), pos); + *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0]; + pos = mad24(groupnum, (int)sizeof(dstT1), pos); #endif #ifdef NEED_MAXVAL - *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_max[0]; - pos = mad24(groupnum, (int)sizeof(srcT1), pos); + *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0]; + pos = mad24(groupnum, (int)sizeof(dstT1), pos); #endif #ifdef NEED_MINLOC *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0]; diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 038f132..21a5518 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -50,7 +50,7 @@ #endif #endif -#if defined OP_NORM_INF_MASK || defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK +#if defined OP_NORM_INF_MASK #ifdef DEPTH_0 #define MIN_VAL 0 @@ -75,8 +75,6 @@ #define MAX_VAL DBL_MAX #endif -#define INDEX_MAX UINT_MAX - #define dstT srcT #define dstT1 srcT1 diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 9d78c0f..8996c48 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1313,7 +1313,7 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx) template void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, int* minLoc, int* maxLoc, - int groupnum, int cn, int cols) + int groupnum, int cols) { uint index_max = std::numeric_limits::max(); T minval = std::numeric_limits::max(); @@ -1393,10 +1393,10 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, } typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal, - int *minLoc, int *maxLoc, - int gropunum, int cn, int cols); + int *minLoc, int *maxLoc, int gropunum, int cols); -static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask) +static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask, + int ddepth = -1, bool absValues = false) { CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) || (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) ); @@ -1405,8 +1405,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(); int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src)); + if (ddepth < 0) + ddepth = depth; - if (depth == CV_64F && !doubleSupport) + if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport) return false; int groupnum = dev.maxComputeUnits(); @@ -1423,26 +1425,32 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* // in case of mask we must know whether mask is filled with zeros or not // so let's calculate min or max location, if it's undefined, so mask is zeros if (!(needMaxLoc || needMinLoc) && haveMask) + { if (needMinVal) needMinLoc = true; else needMaxVal = true; + } + char cvt[40]; String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" - " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s", + " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" + " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s", depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", - needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : ""); + needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", + ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), + ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : ""); ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) return false; - int esz = CV_ELEM_SIZE(depth), esz32s = CV_ELEM_SIZE1(CV_32S), + int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0)); UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); @@ -1477,7 +1485,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* int locTemp[2]; func(db.getMat(ACCESS_READ), minVal, maxVal, needMinLoc ? minLoc ? minLoc : locTemp : minLoc, - needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, cn, src.cols); + needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, src.cols); return true; } @@ -2116,35 +2124,8 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & if (normType == NORM_INF) { if (cn == 1 || !haveMask) - { - UMat abssrc; - - if (depth != CV_8U && depth != CV_16U) - { - int wdepth = std::max(CV_32S, depth), rowsPerWI = d.isIntel() ? 4 : 1; - char cvt[50]; - - ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc, - format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s" - " -D convertToDT=%s -D rowsPerWI=%d%s", - ocl::typeToStr(wdepth), ocl::typeToStr(depth), - ocl::convertTypeStr(depth, wdepth, 1, cvt), rowsPerWI, - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); - if (kabs.empty()) - return false; - - abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn)); - kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn)); - - size_t globalsize[2] = { src.cols * cn, (src.rows + rowsPerWI - 1) / rowsPerWI }; - if (!kabs.run(2, globalsize, NULL, false)) - return false; - } - else - abssrc = src; - - cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask); - } + ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask, + std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U); else { int dbsize = d.maxComputeUnits();