From 2c6b7a52e9cd4aac5e649863fbe08785e008dd64 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 9 Jul 2014 19:57:27 +0400 Subject: [PATCH] improved cv::ocl::predictOptimalVectorWidth --- modules/core/include/opencv2/core/ocl.hpp | 17 +++++++++++- modules/core/src/ocl.cpp | 43 ++++++++++++++++--------------- 2 files changed, 38 insertions(+), 22 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 5ab0d49..8a41f1b 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -598,9 +598,24 @@ CV_EXPORTS const char* typeToStr(int t); CV_EXPORTS const char* memopTypeToStr(int t); CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL); CV_EXPORTS void getPlatfomsInfo(std::vector& platform_info); + + +enum OclVectorStrategy +{ + // all matrices have its own vector width + OCL_VECTOR_OWN = 0, + // all matrices have maximal vector width among all matrices + // (useful for cases when matrices have different data types) + OCL_VECTOR_MAX = 1, + + // default strategy + OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN +}; + CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(), InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(), - InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray()); + InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(), + OclVectorStrategy strat = OCL_VECTOR_DEFAULT); CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m); diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index d279c02..cdc8917 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -4451,42 +4451,45 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name) if (!src.empty()) \ { \ CV_Assert(src.isMat() || src.isUMat()); \ - int ctype = src.type(), ccn = CV_MAT_CN(ctype); \ Size csize = src.size(); \ - cols.push_back(ccn * csize.width); \ - if (ctype != type) \ + int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \ + ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \ + if (cwidth < ckercn || ckercn <= 0) \ + return 1; \ + cols.push_back(cwidth); \ + if (strat == OCL_VECTOR_OWN && ctype != ref_type) \ return 1; \ offsets.push_back(src.offset()); \ steps.push_back(src.step()); \ + dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \ } \ } \ while ((void)0, 0) int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, InputArray src4, InputArray src5, InputArray src6, - InputArray src7, InputArray src8, InputArray src9) + InputArray src7, InputArray src8, InputArray src9, + OclVectorStrategy strat) { - int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth); - Size ssize = src1.size(); const ocl::Device & d = ocl::Device::getDefault(); + int ref_type = src1.type(); int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(), d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(), - d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth]; + d.preferredVectorWidthDouble(), -1 }; // if the device says don't use vectors if (vectorWidths[0] == 1) { // it's heuristic - int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 }; - kercn = vectorWidthsOthers[depth]; + vectorWidths[0] = vectorWidths[1] = 4; + vectorWidths[2] = vectorWidths[3] = 2; + vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4; } - if (ssize.width * cn < kercn || kercn <= 0) - return 1; - std::vector offsets, steps, cols; + std::vector dividers; PROCESS_SRC(src1); PROCESS_SRC(src2); PROCESS_SRC(src3); @@ -4498,23 +4501,21 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, PROCESS_SRC(src9); size_t size = offsets.size(); - int wsz = kercn * esz1; - std::vector dividers(size, wsz); for (size_t i = 0; i < size; ++i) while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0) dividers[i] >>= 1; // default strategy - for (size_t i = 0; i < size; ++i) - if (dividers[i] != wsz) - { - kercn = 1; - break; - } + int kercn = *std::min_element(dividers.begin(), dividers.end()); // another strategy -// width = *std::min_element(dividers.begin(), dividers.end()); + // for (size_t i = 0; i < size; ++i) + // if (dividers[i] != wsz) + // { + // kercn = 1; + // break; + // } return kercn; } -- 2.7.4