optimized cv::norm with NORM_INF

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Fri, 6 Jun 2014 13:15:19 +0000 (17:15 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Fri, 6 Jun 2014 15:34:26 +0000 (19:34 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 6 Jun 2014 13:15:19 +0000 (17:15 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Fri, 6 Jun 2014 15:34:26 +0000 (19:34 +0400)
diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl

index 558679e..2e48387 100644 (file)
--- a/modules/core/src/opencl/minmaxloc.cl
+++ b/modules/core/src/opencl/minmaxloc.cl
@@ -36,6 +36,7 @@
  #define MAX_VAL DBL_MAX
  #endif
  
+#define noconvert
  #define INDEX_MAX UINT_MAX
  
  #ifdef NEED_MINLOC
@@ -93,20 +94,20 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
  #endif
  
  #ifdef NEED_MINVAL
-    __local srcT1 localmem_min[WGS2_ALIGNED];
+    __local dstT1 localmem_min[WGS2_ALIGNED];
  #ifdef NEED_MINLOC
      __local uint localmem_minloc[WGS2_ALIGNED];
  #endif
  #endif
  #ifdef NEED_MAXVAL
-    __local srcT1 localmem_max[WGS2_ALIGNED];
+    __local dstT1 localmem_max[WGS2_ALIGNED];
  #ifdef NEED_MAXLOC
      __local uint localmem_maxloc[WGS2_ALIGNED];
  #endif
  #endif
  
-    srcT1 minval = MAX_VAL, maxval = MIN_VAL;
-    srcT temp;
+    dstT1 minval = MAX_VAL, maxval = MIN_VAL;
+    dstT temp;
      uint minloc = INDEX_MAX, maxloc = INDEX_MAX;
      int src_index;
  #ifdef HAVE_MASK
@@ -130,7 +131,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
          if (mask[mask_index])
  #endif
          {
-            temp = *(__global const srcT *)(srcptr + src_index);
+            temp = convertToDT(*(__global const srcT *)(srcptr + src_index));
  #if kercn == 1
  #ifdef NEED_MINVAL
              if (minval > temp)
@@ -262,12 +263,12 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off
      {
          int pos = 0;
  #ifdef NEED_MINVAL
-        *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_min[0];
-        pos = mad24(groupnum, (int)sizeof(srcT1), pos);
+        *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
+        pos = mad24(groupnum, (int)sizeof(dstT1), pos);
  #endif
  #ifdef NEED_MAXVAL
-        *(__global srcT1 *)(dstptr + mad24(gid, (int)sizeof(srcT1), pos)) = localmem_max[0];
-        pos = mad24(groupnum, (int)sizeof(srcT1), pos);
+        *(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
+        pos = mad24(groupnum, (int)sizeof(dstT1), pos);
  #endif
  #ifdef NEED_MINLOC
          *(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
diff --git a/modules/core/src/opencl/reduce.cl b/modules/core/src/opencl/reduce.cl

index 038f132..21a5518 100644 (file)
--- a/modules/core/src/opencl/reduce.cl
+++ b/modules/core/src/opencl/reduce.cl
@@ -50,7 +50,7 @@
  #endif
  #endif
  
-#if defined OP_NORM_INF_MASK || defined OP_MIN_MAX_LOC || defined OP_MIN_MAX_LOC_MASK
+#if defined OP_NORM_INF_MASK
  
  #ifdef DEPTH_0
  #define MIN_VAL 0
@@ -75,8 +75,6 @@
  #define MAX_VAL DBL_MAX
  #endif
  
-#define INDEX_MAX UINT_MAX
-
  #define dstT srcT
  #define dstT1 srcT1
  
diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp

index 9d78c0f..8996c48 100644 (file)
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -1313,7 +1313,7 @@ static void ofs2idx(const Mat& a, size_t ofs, int* idx)
  template <typename T>
  void getMinMaxRes(const Mat & db, double* minVal, double* maxVal,
                    int* minLoc, int* maxLoc,
-                  int groupnum, int cn, int cols)
+                  int groupnum, int cols)
  {
      uint index_max = std::numeric_limits<uint>::max();
      T minval = std::numeric_limits<T>::max();
@@ -1393,10 +1393,10 @@ void getMinMaxRes(const Mat & db, double* minVal, double* maxVal,
  }
  
  typedef void (*getMinMaxResFunc)(const Mat & db, double *minVal, double *maxVal,
-                                 int *minLoc, int *maxLoc,
-                                 int gropunum, int cn, int cols);
+                                 int *minLoc, int *maxLoc, int gropunum, int cols);
  
-static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask)
+static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
+                           int ddepth = -1, bool absValues = false)
  {
      CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
          (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) );
@@ -1405,8 +1405,10 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
      bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty();
      int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
              kercn = haveMask ? 1 : std::min(4, ocl::predictOptimalVectorWidth(_src));
+    if (ddepth < 0)
+        ddepth = depth;
  
-    if (depth == CV_64F && !doubleSupport)
+    if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
          return false;
  
      int groupnum = dev.maxComputeUnits();
@@ -1423,26 +1425,32 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
      // in case of mask we must know whether mask is filled with zeros or not
      // so let's calculate min or max location, if it's undefined, so mask is zeros
      if (!(needMaxLoc || needMinLoc) && haveMask)
+    {
          if (needMinVal)
              needMinLoc = true;
          else
              needMaxVal = true;
+    }
  
+    char cvt[40];
      String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
-                         " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s",
+                         " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
+                         " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s",
                           depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
                           ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
                           doubleSupport ? " -D DOUBLE_SUPPORT" : "",
                           _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
                           _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
                           needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
-                         needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "");
+                         needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
+                         ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
+                         ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "");
  
      ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
      if (k.empty())
          return false;
  
-    int esz = CV_ELEM_SIZE(depth), esz32s = CV_ELEM_SIZE1(CV_32S),
+    int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
              dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
                                   (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0));
      UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
@@ -1477,7 +1485,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int*
      int locTemp[2];
      func(db.getMat(ACCESS_READ), minVal, maxVal,
           needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
-         needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, cn, src.cols);
+         needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, groupnum, src.cols);
  
      return true;
  }
@@ -2116,35 +2124,8 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double &
      if (normType == NORM_INF)
      {
          if (cn == 1 || !haveMask)
-        {
-            UMat abssrc;
-
-            if (depth != CV_8U && depth != CV_16U)
-            {
-                int wdepth = std::max(CV_32S, depth), rowsPerWI = d.isIntel() ? 4 : 1;
-                char cvt[50];
-
-                ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc,
-                                 format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s"
-                                        " -D convertToDT=%s -D rowsPerWI=%d%s",
-                                        ocl::typeToStr(wdepth), ocl::typeToStr(depth),
-                                        ocl::convertTypeStr(depth, wdepth, 1, cvt), rowsPerWI,
-                                        doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
-                if (kabs.empty())
-                    return false;
-
-                abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn));
-                kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn));
-
-                size_t globalsize[2] = { src.cols * cn, (src.rows + rowsPerWI - 1) / rowsPerWI };
-                if (!kabs.run(2, globalsize, NULL, false))
-                    return false;
-            }
-            else
-                abssrc = src;
-
-            cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask);
-        }
+            ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
+                          std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U);
          else
          {
              int dbsize = d.maxComputeUnits();
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Fri, 6 Jun 2014 13:15:19 +0000 (17:15 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Fri, 6 Jun 2014 15:34:26 +0000 (19:34 +0400)
modules/core/src/opencl/minmaxloc.cl		patch \| blob \| history
modules/core/src/opencl/reduce.cl		patch \| blob \| history
modules/core/src/stat.cpp		patch \| blob \| history