From 7017b1250a89836b92011294d47b63cc5368d4ba Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 9 Jul 2014 20:06:54 +0400 Subject: [PATCH] used new stratehy in cv::accumulate** --- modules/core/include/opencv2/core/ocl.hpp | 5 +++++ modules/core/src/ocl.cpp | 30 +++++++++++++++--------------- modules/imgproc/src/accum.cpp | 11 ++--------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 8a41f1b..744cf21 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -617,6 +617,11 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), OclVectorStrategy strat = OCL_VECTOR_DEFAULT); +// with OCL_VECTOR_MAX strategy +CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), + InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); + CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); class CV_EXPORTS Image2D diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index cdc8917..0b8e033 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4462,6 +4462,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name) offsets.push_back(src.offset()); \ steps.push_back(src.step()); \ dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \ + kercns.push_back(ckercn); \ } \ } \ while ((void)0, 0) @@ -4483,13 +4484,13 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, if (vectorWidths[0] == 1) { // it's heuristic - vectorWidths[0] = vectorWidths[1] = 4; - vectorWidths[2] = vectorWidths[3] = 2; - vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4; + vectorWidths[CV_8U] = vectorWidths[CV_8S] = 16; + vectorWidths[CV_16U] = vectorWidths[CV_16S] = 8; + vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1; } std::vector offsets, steps, cols; - std::vector dividers; + std::vector dividers, kercns; PROCESS_SRC(src1); PROCESS_SRC(src2); PROCESS_SRC(src3); @@ -4503,23 +4504,22 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, size_t size = offsets.size(); for (size_t i = 0; i < size; ++i) - while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0) - dividers[i] >>= 1; + while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0) + dividers[i] >>= 1, kercns[i] >>= 1; // default strategy - int kercn = *std::min_element(dividers.begin(), dividers.end()); - - // another strategy - // for (size_t i = 0; i < size; ++i) - // if (dividers[i] != wsz) - // { - // kercn = 1; - // break; - // } + int kercn = *std::min_element(kercns.begin(), kercns.end()); return kercn; } +int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3, + InputArray src4, InputArray src5, InputArray src6, + InputArray src7, InputArray src8, InputArray src9) +{ + return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX); +} + #undef PROCESS_SRC diff --git a/modules/imgproc/src/accum.cpp b/modules/imgproc/src/accum.cpp index 3987405..9f23d34 100644 --- a/modules/imgproc/src/accum.cpp +++ b/modules/imgproc/src/accum.cpp @@ -370,16 +370,9 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED); const ocl::Device & dev = ocl::Device::getDefault(); - int vectorWidths[] = { 4, 4, 2, 2, 1, 1, 1, -1 }; + bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0; int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth(); - int pcn = std::max(vectorWidths[sdepth], vectorWidths[ddepth]), sesz = CV_ELEM_SIZE(sdepth) * pcn, - desz = CV_ELEM_SIZE(ddepth) * pcn, rowsPerWI = dev.isIntel() ? 4 : 1; - - bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), - usepcn = _src.offset() % sesz == 0 && _src.step() % sesz == 0 && (_src.cols() * cn) % pcn == 0 && - _src2.offset() % desz == 0 && _src2.step() % desz == 0 && - _dst.offset() % pcn == 0 && _dst.step() % desz == 0 && !haveMask; - int kercn = usepcn ? pcn : haveMask ? cn : 1; + int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1; if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) return false; -- 2.7.4