From: Ilya Lavrenov Date: Wed, 4 Jun 2014 14:22:55 +0000 (+0400) Subject: minmaxloc X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~348^2~5 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=fd5a8b3e9769c1b097e6246ac46f726a1204f1bd;p=profile%2Fivi%2Fopencv.git minmaxloc --- diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl new file mode 100644 index 0000000..558679e --- /dev/null +++ b/modules/core/src/opencl/minmaxloc.cl @@ -0,0 +1,280 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Copyright (C) 2014, Itseez, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#ifdef DEPTH_0 +#define MIN_VAL 0 +#define MAX_VAL 255 +#elif defined DEPTH_1 +#define MIN_VAL -128 +#define MAX_VAL 127 +#elif defined DEPTH_2 +#define MIN_VAL 0 +#define MAX_VAL 65535 +#elif defined DEPTH_3 +#define MIN_VAL -32768 +#define MAX_VAL 32767 +#elif defined DEPTH_4 +#define MIN_VAL INT_MIN +#define MAX_VAL INT_MAX +#elif defined DEPTH_5 +#define MIN_VAL (-FLT_MAX) +#define MAX_VAL FLT_MAX +#elif defined DEPTH_6 +#define MIN_VAL (-DBL_MAX) +#define MAX_VAL DBL_MAX +#endif + +#define INDEX_MAX UINT_MAX + +#ifdef NEED_MINLOC +#define CALC_MINLOC(inc) minloc = id + inc +#else +#define CALC_MINLOC(inc) +#endif + +#ifdef NEED_MAXLOC +#define CALC_MAXLOC(inc) maxloc = id + inc +#else +#define CALC_MAXLOC(inc) +#endif + +#ifdef NEED_MINVAL +#define CALC_MIN(p, inc) \ + if (minval > temp.p) \ + { \ + minval = temp.p; \ + CALC_MINLOC(inc); \ + } +#else +#define CALC_MIN(p, inc) +#endif + +#ifdef NEED_MAXVAL +#define CALC_MAX(p, inc) \ + if (maxval < temp.p) \ + { \ + maxval = temp.p; \ + CALC_MAXLOC(inc); \ + } +#else +#define CALC_MAX(p, inc) +#endif + +#define CALC_P(p, inc) \ + CALC_MIN(p, inc) \ + CALC_MAX(p, inc) + +__kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols, + int total, int groupnum, __global uchar * dstptr +#ifdef HAVE_MASK + , __global const uchar * mask, int mask_step, int mask_offset +#endif + ) +{ + int lid = get_local_id(0); + int gid = get_group_id(0); + int id = get_global_id(0) * kercn; + + srcptr += src_offset; +#ifdef HAVE_MASK + mask += mask_offset; +#endif + +#ifdef NEED_MINVAL + __local srcT1 localmem_min[WGS2_ALIGNED]; +#ifdef NEED_MINLOC + __local uint localmem_minloc[WGS2_ALIGNED]; +#endif +#endif +#ifdef NEED_MAXVAL + __local srcT1 localmem_max[WGS2_ALIGNED]; +#ifdef NEED_MAXLOC + __local uint localmem_maxloc[WGS2_ALIGNED]; +#endif +#endif + + srcT1 minval = MAX_VAL, maxval = MIN_VAL; + srcT temp; + uint minloc = INDEX_MAX, maxloc = INDEX_MAX; + int src_index; +#ifdef HAVE_MASK + int mask_index; +#endif + + for (int grain = groupnum * WGS * kercn; id < total; id += grain) + { +#ifdef HAVE_SRC_CONT + src_index = mul24(id, (int)sizeof(srcT1)); +#else + src_index = mad24(id / cols, src_step, mul24(id % cols, (int)sizeof(srcT1))); +#endif + +#ifdef HAVE_MASK +#ifdef HAVE_MASK_CONT + mask_index = id; +#else + mask_index = mad24(id / cols, mask_step, id % cols); +#endif + if (mask[mask_index]) +#endif + { + temp = *(__global const srcT *)(srcptr + src_index); +#if kercn == 1 +#ifdef NEED_MINVAL + if (minval > temp) + { + minval = temp; +#ifdef NEED_MINLOC + minloc = id; +#endif + } +#endif +#ifdef NEED_MAXVAL + if (maxval < temp) + { + maxval = temp; +#ifdef NEED_MAXLOC + maxloc = id; +#endif + } +#endif +#elif kercn >= 2 + CALC_P(s0, 0) + CALC_P(s1, 1) +#if kercn >= 4 + CALC_P(s2, 2) + CALC_P(s3, 3) +#if kercn >= 8 + CALC_P(s4, 4) + CALC_P(s5, 5) + CALC_P(s6, 6) + CALC_P(s7, 7) +#if kercn == 16 + CALC_P(s8, 8) + CALC_P(s9, 9) + CALC_P(sA, 10) + CALC_P(sB, 11) + CALC_P(sC, 12) + CALC_P(sD, 13) + CALC_P(sE, 14) + CALC_P(sF, 15) +#endif +#endif +#endif +#endif + } + } + + if (lid < WGS2_ALIGNED) + { +#ifdef NEED_MINVAL + localmem_min[lid] = minval; +#endif +#ifdef NEED_MAXVAL + localmem_max[lid] = maxval; +#endif +#ifdef NEED_MINLOC + localmem_minloc[lid] = minloc; +#endif +#ifdef NEED_MAXLOC + localmem_maxloc[lid] = maxloc; +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED) + { + int lid3 = lid - WGS2_ALIGNED; +#ifdef NEED_MINVAL + if (localmem_min[lid3] >= minval) + { +#ifdef NEED_MINLOC + if (localmem_min[lid3] == minval) + localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc); + else + localmem_minloc[lid3] = minloc, +#endif + localmem_min[lid3] = minval; + } +#endif +#ifdef NEED_MAXVAL + if (localmem_max[lid3] <= maxval) + { +#ifdef NEED_MAXLOC + if (localmem_max[lid3] == maxval) + localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc); + else + localmem_maxloc[lid3] = maxloc, +#endif + localmem_max[lid3] = maxval; + } +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); + + for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1) + { + if (lid < lsize) + { + int lid2 = lsize + lid; + +#ifdef NEED_MINVAL + if (localmem_min[lid] >= localmem_min[lid2]) + { +#ifdef NEED_MINLOC + if (localmem_min[lid] == localmem_min[lid2]) + localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]); + else + localmem_minloc[lid] = localmem_minloc[lid2], +#endif + localmem_min[lid] = localmem_min[lid2]; + } +#endif +#ifdef NEED_MAXVAL + if (localmem_max[lid] <= localmem_max[lid2]) + { +#ifdef NEED_MAXLOC + if (localmem_max[lid] == localmem_max[lid2]) + localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]); + else + localmem_maxloc[lid] = localmem_maxloc[lid2], +#endif + localmem_max[lid] = localmem_max[lid2]; + } +#endif + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) + { + int pos = 0; +#ifdef NEED_MINVAL + *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_min[0]; + pos = mad24(groupnum, (int)sizeof(srcT1), pos); +#endif +#ifdef NEED_MAXVAL + *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_max[0]; + pos = mad24(groupnum, (int)sizeof(srcT1), pos); +#endif +#ifdef NEED_MINLOC + *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0]; + pos = mad24(groupnum, (int)sizeof(uint), pos); +#endif +#ifdef NEED_MAXLOC + *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0]; +#endif + } +} diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl index 851d36e..038f132 100644 --- a/modules/core/src/opencl/reduce.cl +++ b/modules/core/src/opencl/reduce.cl @@ -75,6 +75,8 @@ #define MAX_VAL DBL_MAX #endif +#define INDEX_MAX UINT_MAX + #define dstT srcT #define dstT1 srcT1 @@ -357,102 +359,11 @@ #define CALC_RESULT \ storepix(localmem_max[0], dstptr + dstTSIZE * gid) -// minMaxLoc stuff -#elif defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK - -#define DECLARE_LOCAL_MEM \ - __local srcT localmem_min[WGS2_ALIGNED]; \ - __local srcT localmem_max[WGS2_ALIGNED]; \ - __local int localmem_minloc[WGS2_ALIGNED]; \ - __local int localmem_maxloc[WGS2_ALIGNED] -#define DEFINE_ACCUMULATOR \ - srcT minval = MAX_VAL; \ - srcT maxval = MIN_VAL; \ - int negative = -1; \ - int minloc = negative; \ - int maxloc = negative; \ - srcT temp; \ - int temploc -#define REDUCE_GLOBAL \ - temp = loadpix(srcptr + src_index); \ - temploc = id; \ - srcT temp_minval = minval, temp_maxval = maxval; \ - minval = min(minval, temp); \ - maxval = max(maxval, temp); \ - minloc = (minval == temp_minval) ? (temp_minval == MAX_VAL) ? temploc : minloc : temploc; \ - maxloc = (maxval == temp_maxval) ? (temp_maxval == MIN_VAL) ? temploc : maxloc : temploc -#define SET_LOCAL_1 \ - localmem_min[lid] = minval; \ - localmem_max[lid] = maxval; \ - localmem_minloc[lid] = minloc; \ - localmem_maxloc[lid] = maxloc -#define REDUCE_LOCAL_1 \ - srcT oldmin = localmem_min[lid-WGS2_ALIGNED]; \ - srcT oldmax = localmem_max[lid-WGS2_ALIGNED]; \ - localmem_min[lid - WGS2_ALIGNED] = min(minval, localmem_min[lid-WGS2_ALIGNED]); \ - localmem_max[lid - WGS2_ALIGNED] = max(maxval, localmem_max[lid-WGS2_ALIGNED]); \ - srcT minv = localmem_min[lid - WGS2_ALIGNED], maxv = localmem_max[lid - WGS2_ALIGNED]; \ - localmem_minloc[lid - WGS2_ALIGNED] = (minv == minval) ? (minv == oldmin) ? \ - min(minloc, localmem_minloc[lid-WGS2_ALIGNED]) : minloc : localmem_minloc[lid-WGS2_ALIGNED]; \ - localmem_maxloc[lid - WGS2_ALIGNED] = (maxv == maxval) ? (maxv == oldmax) ? \ - min(maxloc, localmem_maxloc[lid-WGS2_ALIGNED]) : maxloc : localmem_maxloc[lid-WGS2_ALIGNED] -#define REDUCE_LOCAL_2 \ - srcT oldmin = localmem_min[lid]; \ - srcT oldmax = localmem_max[lid]; \ - localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]); \ - localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]); \ - srcT min1 = localmem_min[lid], min2 = localmem_min[lid2]; \ - localmem_minloc[lid] = (localmem_minloc[lid] == negative) ? localmem_minloc[lid2] : (localmem_minloc[lid2] == negative) ? \ - localmem_minloc[lid] : (min1 == min2) ? (min1 == oldmin) ? min(localmem_minloc[lid2],localmem_minloc[lid]) : \ - localmem_minloc[lid2] : localmem_minloc[lid]; \ - srcT max1 = localmem_max[lid], max2 = localmem_max[lid2]; \ - localmem_maxloc[lid] = (localmem_maxloc[lid] == negative) ? localmem_maxloc[lid2] : (localmem_maxloc[lid2] == negative) ? \ - localmem_maxloc[lid] : (max1 == max2) ? (max1 == oldmax) ? min(localmem_maxloc[lid2],localmem_maxloc[lid]) : \ - localmem_maxloc[lid2] : localmem_maxloc[lid] -#define CALC_RESULT \ - storepix(localmem_min[0], dstptr + dstTSIZE * gid); \ - storepix(localmem_max[0], dstptr2 + dstTSIZE * gid); \ - dstlocptr[gid] = localmem_minloc[0]; \ - dstlocptr2[gid] = localmem_maxloc[0] - -#if defined OP_MIN_MAX_LOC_MASK -#undef DEFINE_ACCUMULATOR -#define DEFINE_ACCUMULATOR \ - srcT minval = MAX_VAL; \ - srcT maxval = MIN_VAL; \ - int negative = -1; \ - int minloc = negative; \ - int maxloc = negative; \ - srcT temp, temp_mask, zeroVal = (srcT)(0); \ - int temploc -#undef REDUCE_GLOBAL -#define REDUCE_GLOBAL \ - temp = loadpix(srcptr + src_index); \ - temploc = id; \ - MASK_INDEX; \ - __global const uchar * mask = (__global const uchar *)(maskptr + mask_index); \ - temp_mask = mask[0]; \ - srcT temp_minval = minval, temp_maxval = maxval; \ - minval = (temp_mask == zeroVal) ? minval : min(minval, temp); \ - maxval = (temp_mask == zeroVal) ? maxval : max(maxval, temp); \ - minloc = (temp_mask == zeroVal) ? minloc : (minval == temp_minval) ? (temp_minval == MAX_VAL) ? temploc : minloc : temploc; \ - maxloc = (temp_mask == zeroVal) ? maxloc : (maxval == temp_maxval) ? (temp_maxval == MIN_VAL) ? temploc : maxloc : temploc -#endif - #else #error "No operation" -#endif // end of minMaxLoc stuff - -#ifdef OP_MIN_MAX_LOC -#undef EXTRA_PARAMS -#define EXTRA_PARAMS , __global uchar * dstptr2, __global int * dstlocptr, __global int * dstlocptr2 - -#elif defined OP_MIN_MAX_LOC_MASK -#undef EXTRA_PARAMS -#define EXTRA_PARAMS , __global uchar * dstptr2, __global int * dstlocptr, __global int * dstlocptr2, \ - __global const uchar * maskptr, int mask_step, int mask_offset +#endif // end of norm (NORM_INF) with cn > 1 and mask -#elif defined OP_DOT +#ifdef OP_DOT #undef EXTRA_PARAMS #define EXTRA_PARAMS , __global uchar * src2ptr, int src2_step, int src2_offset #endif diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 0a16c06..9d78c0f 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1311,104 +1311,157 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx) #ifdef HAVE_OPENCL template -void getMinMaxRes(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double* minVal, - double* maxVal, int* minLoc, int* maxLoc, const int groupnum, const int cn, const int cols) +void getMinMaxRes(const Mat & db, double* minVal, double* maxVal, + int* minLoc, int* maxLoc, + int groupnum, int cn, int cols) { - T min = std::numeric_limits::max(); - T max = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(); - int minloc = INT_MAX, maxloc = INT_MAX; + uint index_max = std::numeric_limits::max(); + T minval = std::numeric_limits::max(); + T maxval = std::numeric_limits::min() > 0 ? -std::numeric_limits::max() : std::numeric_limits::min(); + uint minloc = index_max, maxloc = index_max; + + int index = 0; + const T * minptr = NULL, * maxptr = NULL; + const uint * minlocptr = NULL, * maxlocptr = NULL; + if (minVal || minLoc) + { + minptr = (const T *)db.data; + index += sizeof(T) * groupnum; + } + if (maxVal || maxLoc) + { + maxptr = (const T *)(db.data + index); + index += sizeof(T) * groupnum; + } + if (minLoc) + { + minlocptr = (uint *)(db.data + index); + index += sizeof(uint) * groupnum; + } + if (maxLoc) + maxlocptr = (uint *)(db.data + index); + for (int i = 0; i < groupnum; i++) { - T current_min = minv.at(0,i); - T current_max = maxv.at(0,i); - T oldmin = min, oldmax = max; - min = std::min(min, current_min); - max = std::max(max, current_max); - if (cn == 1) + if (minptr && minptr[i] <= minval) { - int current_minloc = minl.at(0,i); - int current_maxloc = maxl.at(0,i); - if(current_minloc < 0 || current_maxloc < 0) continue; - minloc = (oldmin == current_min) ? std::min(minloc, current_minloc) : (oldmin < current_min) ? minloc : current_minloc; - maxloc = (oldmax == current_max) ? std::min(maxloc, current_maxloc) : (oldmax > current_max) ? maxloc : current_maxloc; + if (minptr[i] == minval) + { + if (minlocptr) + minloc = std::min(minlocptr[i], minloc); + } + else + { + if (minlocptr) + minloc = minlocptr[i]; + minval = minptr[i]; + } + } + if (maxptr && maxptr[i] >= maxval) + { + if (maxptr[i] == maxval) + { + if (maxlocptr) + maxloc = std::min(maxlocptr[i], maxloc); + } + else + { + if (maxlocptr) + maxloc = maxlocptr[i]; + maxval = maxptr[i]; + } } } - bool zero_mask = (maxloc == INT_MAX) || (minloc == INT_MAX); + bool zero_mask = (minLoc && minloc == index_max) || + (maxLoc && maxloc == index_max); + if (minVal) - *minVal = zero_mask ? 0 : (double)min; + *minVal = zero_mask ? 0 : (double)minval; if (maxVal) - *maxVal = zero_mask ? 0 : (double)max; + *maxVal = zero_mask ? 0 : (double)maxval; + if (minLoc) { - minLoc[0] = zero_mask ? -1 : minloc/cols; - minLoc[1] = zero_mask ? -1 : minloc%cols; + minLoc[0] = zero_mask ? -1 : minloc / cols; + minLoc[1] = zero_mask ? -1 : minloc % cols; } if (maxLoc) { - maxLoc[0] = zero_mask ? -1 : maxloc/cols; - maxLoc[1] = zero_mask ? -1 : maxloc%cols; + maxLoc[0] = zero_mask ? -1 : maxloc / cols; + maxLoc[1] = zero_mask ? -1 : maxloc % cols; } } -typedef void (*getMinMaxResFunc)(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double *minVal, - double *maxVal, int *minLoc, int *maxLoc, const int gropunum, const int cn, const int cols); +typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal, + int *minLoc, int *maxLoc, + int gropunum, int cn, int cols); static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask) { CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) || (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) ); - int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = 1; - bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + const ocl::Device & dev = ocl::Device::getDefault(); + bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(); + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), + kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src)); if (depth == CV_64F && !doubleSupport) return false; - int groupnum = ocl::Device::getDefault().maxComputeUnits(); - size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); + int groupnum = dev.maxComputeUnits(); + size_t wgs = dev.maxWorkGroupSize(); int wgs2_aligned = 1; while (wgs2_aligned < (int)wgs) wgs2_aligned <<= 1; wgs2_aligned >>= 1; - String opts = format("-D DEPTH_%d -D srcT=%s -D OP_MIN_MAX_LOC%s -D WGS=%d" - " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d", - depth, ocl::typeToStr(depth), _mask.empty() ? "" : "_MASK", (int)wgs, - wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", + bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL, + needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL; + + // in case of mask we must know whether mask is filled with zeros or not + // so let's calculate min or max location, if it's undefined, so mask is zeros + if (!(needMaxLoc || needMinLoc) && haveMask) + if (needMinVal) + needMinLoc = true; + else + needMaxVal = true; + + String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" + " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s", + depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, + ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, + doubleSupport ? " -D DOUBLE_SUPPORT" : "", _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", - _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn); + _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, + needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", + needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : ""); - ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); + ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); if (k.empty()) return false; - UMat src = _src.getUMat(), minval(1, groupnum, src.type()), - maxval(1, groupnum, src.type()), minloc( 1, groupnum, CV_32SC1), - maxloc( 1, groupnum, CV_32SC1), mask; - if (!_mask.empty()) - mask = _mask.getUMat(); + int esz = CV_ELEM_SIZE(depth), esz32s = CV_ELEM_SIZE1(CV_32S), + dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + + (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0)); + UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); - if (src.channels() > 1) + if (cn > 1) src = src.reshape(1); - if (mask.empty()) + if (!haveMask) k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), - groupnum, ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval), - ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc)); + groupnum, ocl::KernelArg::PtrWriteOnly(db)); else - k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum, - ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval), - ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc), ocl::KernelArg::ReadOnlyNoSize(mask)); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), + groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask)); size_t globalsize = groupnum * wgs; if (!k.run(1, &globalsize, &wgs, false)) return false; - Mat minv = minval.getMat(ACCESS_READ), maxv = maxval.getMat(ACCESS_READ), - minl = minloc.getMat(ACCESS_READ), maxl = maxloc.getMat(ACCESS_READ); - - static getMinMaxResFunc functab[7] = + static const getMinMaxResFunc functab[7] = { getMinMaxRes, getMinMaxRes, @@ -1419,10 +1472,12 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* getMinMaxRes }; - getMinMaxResFunc func; + getMinMaxResFunc func = functab[depth]; - func = functab[depth]; - func(minv, maxv, minl, maxl, minVal, maxVal, minLoc, maxLoc, groupnum, src.channels(), src.cols); + int locTemp[2]; + func(db.getMat(ACCESS_READ), minVal, maxVal, + needMinLoc ? minLoc ? minLoc : locTemp : minLoc, + needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, cn, src.cols); return true; }