improved cv::ocl::predictOptimalVectorWidth

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Wed, 9 Jul 2014 15:57:27 +0000 (19:57 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 25 Aug 2014 07:25:01 +0000 (11:25 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Wed, 9 Jul 2014 15:57:27 +0000 (19:57 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 25 Aug 2014 07:25:01 +0000 (11:25 +0400)
diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp

index 5ab0d49..8a41f1b 100644 (file)
--- a/modules/core/include/opencv2/core/ocl.hpp
+++ b/modules/core/include/opencv2/core/ocl.hpp
@@ -598,9 +598,24 @@ CV_EXPORTS const char* typeToStr(int t);
  CV_EXPORTS const char* memopTypeToStr(int t);
  CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
  CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
+
+
+enum OclVectorStrategy
+{
+    // all matrices have its own vector width
+    OCL_VECTOR_OWN = 0,
+    // all matrices have maximal vector width among all matrices
+    // (useful for cases when matrices have different data types)
+    OCL_VECTOR_MAX = 1,
+
+    // default strategy
+    OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
+};
+
  CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
                                           InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
-                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
+                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
+                                         OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
  
  CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
  
diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp

index d279c02..cdc8917 100644 (file)
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@@ -4451,42 +4451,45 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
          if (!src.empty()) \
          { \
              CV_Assert(src.isMat() || src.isUMat()); \
-            int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
              Size csize = src.size(); \
-            cols.push_back(ccn * csize.width); \
-            if (ctype != type) \
+            int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
+                ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
+            if (cwidth < ckercn || ckercn <= 0) \
+                return 1; \
+            cols.push_back(cwidth); \
+            if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
                  return 1; \
              offsets.push_back(src.offset()); \
              steps.push_back(src.step()); \
+            dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
          } \
      } \
      while ((void)0, 0)
  
  int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
                                InputArray src4, InputArray src5, InputArray src6,
-                              InputArray src7, InputArray src8, InputArray src9)
+                              InputArray src7, InputArray src8, InputArray src9,
+                              OclVectorStrategy strat)
  {
-    int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz1 = CV_ELEM_SIZE1(depth);
-    Size ssize = src1.size();
      const ocl::Device & d = ocl::Device::getDefault();
+    int ref_type = src1.type();
  
      int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
          d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
          d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
-        d.preferredVectorWidthDouble(), -1 }, kercn = vectorWidths[depth];
+        d.preferredVectorWidthDouble(), -1 };
  
      // if the device says don't use vectors
      if (vectorWidths[0] == 1)
      {
          // it's heuristic
-        int vectorWidthsOthers[] = { 16, 16, 8, 8, 1, 1, 1, -1 };
-        kercn = vectorWidthsOthers[depth];
+        vectorWidths[0] = vectorWidths[1] = 4;
+        vectorWidths[2] = vectorWidths[3] = 2;
+        vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4;
      }
  
-    if (ssize.width * cn < kercn || kercn <= 0)
-        return 1;
-
      std::vector<size_t> offsets, steps, cols;
+    std::vector<int> dividers;
      PROCESS_SRC(src1);
      PROCESS_SRC(src2);
      PROCESS_SRC(src3);
@@ -4498,23 +4501,21 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
      PROCESS_SRC(src9);
  
      size_t size = offsets.size();
-    int wsz = kercn * esz1;
-    std::vector<int> dividers(size, wsz);
  
      for (size_t i = 0; i < size; ++i)
          while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
              dividers[i] >>= 1;
  
      // default strategy
-    for (size_t i = 0; i < size; ++i)
-        if (dividers[i] != wsz)
-        {
-            kercn = 1;
-            break;
-        }
+    int kercn = *std::min_element(dividers.begin(), dividers.end());
  
      // another strategy
-//    width = *std::min_element(dividers.begin(), dividers.end());
+    // for (size_t i = 0; i < size; ++i)
+    //     if (dividers[i] != wsz)
+    //     {
+    //         kercn = 1;
+    //         break;
+    //     }
  
      return kercn;
  }
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Wed, 9 Jul 2014 15:57:27 +0000 (19:57 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 25 Aug 2014 07:25:01 +0000 (11:25 +0400)
modules/core/include/opencv2/core/ocl.hpp		patch \| blob \| history
modules/core/src/ocl.cpp		patch \| blob \| history