Added integer arithmetic to sepFilter2D

author Alexander Karsakov <alexander.karsakov@itseez.com>

Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)

committer Alexander Karsakov <alexander.karsakov@itseez.com>

Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)
author Alexander Karsakov <alexander.karsakov@itseez.com>
Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)
committer Alexander Karsakov <alexander.karsakov@itseez.com>
Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)
diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp

index 2bc6b8a..d81f8af 100644 (file)
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@@ -3275,6 +3275,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
      int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type);
      bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
      Size bufSize = buf.size();
+    int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type);
  
      if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
          return false;
@@ -3306,11 +3307,11 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
                                            radiusX, (int)localsize[0], (int)localsize[1], cn, btype,
                                            extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
                                            isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
-                                          ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
-                                          ocl::convertTypeStr(sdepth, CV_32F, cn, cvt),
-                                          ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F),
+                                          ocl::typeToStr(type), ocl::typeToStr(buf_type),
+                                          ocl::convertTypeStr(sdepth, bdepth, cn, cvt),
+                                          ocl::typeToStr(sdepth), ocl::typeToStr(bdepth),
                                            doubleSupport ? " -D DOUBLE_SUPPORT" : "");
-    build_options += ocl::kernelToStr(kernelX, CV_32F);
+    build_options += ocl::kernelToStr(kernelX, bdepth);
  
      Size srcWholeSize; Point srcOffset;
      src.locateROI(srcWholeSize, srcOffset);
@@ -3337,7 +3338,7 @@ static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX
      return k.run(2, globalsize, localsize, false);
  }
  
-static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor)
+static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, double delta, int anchor, int bits)
  {
      bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
      if (dst.depth() == CV_64F && !doubleSupport)
@@ -3352,6 +3353,7 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
  
      int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype);
      Size sz = dst.size();
+    int buf_type = buf.type(), bdepth = CV_MAT_DEPTH(buf_type);
  
      globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
      globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
@@ -3359,13 +3361,13 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY
      char cvt[40];
      cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
                                            " -D srcT=%s -D dstT=%s -D convertToDstT=%s"
-                                          " -D srcT1=%s -D dstT1=%s%s",
+                                          " -D srcT1=%s -D dstT1=%s -D BITS=%d%s",
                                            anchor, (int)localsize[0], (int)localsize[1], cn,
-                                          ocl::typeToStr(buf.type()), ocl::typeToStr(dtype),
-                                          ocl::convertTypeStr(CV_32F, ddepth, cn, cvt),
-                                          ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth),
-                                          doubleSupport ? " -D DOUBLE_SUPPORT" : "");
-    build_options += ocl::kernelToStr(kernelY, CV_32F);
+                                          ocl::typeToStr(buf_type), ocl::typeToStr(dtype),
+                                          ocl::convertTypeStr(bdepth, ddepth, cn, cvt),
+                                          ocl::typeToStr(bdepth), ocl::typeToStr(ddepth),
+                                          bits, doubleSupport ? " -D DOUBLE_SUPPORT" : "");
+    build_options += ocl::kernelToStr(kernelY, bdepth);
  
      ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc,
                    build_options);
@@ -3457,13 +3459,13 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
      if (ddepth < 0)
          ddepth = sdepth;
  
-    CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
-                imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) &&
-                imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
-                (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
-                (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
-                ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta,
-                                           borderType & ~BORDER_ISOLATED, ddepth), true)
+    //CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
+    //            imgSize.width > optimizedSepFilterLocalSize + (kernelX.cols >> 1) &&
+    //            imgSize.height > optimizedSepFilterLocalSize + (kernelY.cols >> 1) &&
+    //            (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(-1, -1) &&
+    //            (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
+    //            ocl_sepFilter2D_SinglePass(_src, _dst, kernelX, kernelY, delta,
+    //                                       borderType & ~BORDER_ISOLATED, ddepth), true)
  
      if (anchor.x < 0)
          anchor.x = kernelX.cols >> 1;
@@ -3474,19 +3476,45 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
      Size srcWholeSize; Point srcOffset;
      src.locateROI(srcWholeSize, srcOffset);
  
-    bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
-            src.cols % 4 == 0 && src.step % 4 == 0;
+    //bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
+    //        src.cols % 4 == 0 && src.step % 4 == 0;
+    bool fast8uc1 = false;
+
+    int rtype = getKernelType(kernelX,
+        kernelX.rows == 1 ? Point(anchor.x, 0) : Point(0, anchor.x));
+    int ctype = getKernelType(kernelY,
+        kernelY.rows == 1 ? Point(anchor.y, 0) : Point(0, anchor.y));
+
+    int bdepth = CV_32F;
+    int bits = 0;
+
+    if( sdepth == CV_8U &&
+        ((rtype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
+          ctype == KERNEL_SMOOTH+KERNEL_SYMMETRICAL &&
+          ddepth == CV_8U)))
+    {
+        bdepth = CV_32S;
+        bits = 8;
+        _kernelX.getMat().convertTo( kernelX, CV_32S, 1 << bits );
+        _kernelY.getMat().convertTo( kernelY, CV_32S, 1 << bits );
+        kernelX = kernelX.reshape(1,1);
+        kernelY = kernelY.reshape(1,1);
+        bits *= 2;
+        delta *= (1 << bits);
+    }
  
      Size srcSize = src.size();
      Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
-    UMat buf(bufSize, CV_32FC(cn));
+    UMat buf(bufSize, CV_MAKETYPE(bdepth, cn));
      if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1))
          return false;
  
+    Mat buffer = buf.getMat(ACCESS_READ);
+
      _dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
      UMat dst = _dst.getUMat();
  
-    return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y);
+    return ocl_sepColFilter2D(buf, dst, kernelY, delta, anchor.y, bits);
  }
  
  #endif
diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl

index 29514cc..94730d8 100644 (file)
--- a/modules/imgproc/src/opencl/filterSepCol.cl
+++ b/modules/imgproc/src/opencl/filterSepCol.cl
@@ -60,7 +60,7 @@
  #endif
  
  #define DIG(a) a,
-__constant float mat_kernel[] = { COEFF };
+__constant srcT1 mat_kernel[] = { COEFF };
  
  __kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
                           __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, float delta)
@@ -97,8 +97,13 @@ __kernel void col_filter(__global const uchar * src, int src_step, int src_offse
          temp[0] = LDS_DAT[l_y + RADIUSY - i][l_x];
          temp[1] = LDS_DAT[l_y + RADIUSY + i][l_x];
          sum += mad(temp[0], mat_kernel[RADIUSY - i], temp[1] * mat_kernel[RADIUSY + i]);
+        //sum += temp[0]*mat_kernel[RADIUSY - i] + temp[1] * mat_kernel[RADIUSY + i];
      }
  
+#if BITS > 0
+    sum = sum >> BITS;
+#endif
+
      // write the result to dst
      if (x < dst_cols && y < dst_rows)
      {
diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl

index 726de44..8deec35 100644 (file)
--- a/modules/imgproc/src/opencl/filterSepRow.cl
+++ b/modules/imgproc/src/opencl/filterSepRow.cl
@@ -138,7 +138,7 @@
  #endif
  
  #define DIG(a) a,
-__constant float mat_kernel[] = { COEFF };
+__constant dstT1 mat_kernel[] = { COEFF };
  
  __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel, int src_offset_x, int src_offset_y,
                                 int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
@@ -356,6 +356,7 @@ __kernel void row_filter(__global const uchar * src, int src_step, int src_offse
          temp[0] = LDS_DAT[l_y][l_x + RADIUSX - i];
          temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
          sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
+        //sum += convertToDstT(temp[0])*mat_kernel[RADIUSX - i] + convertToDstT(temp[1]) * mat_kernel[RADIUSX + i];
      }
  
      // write the result to dst
diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp

index 864fec7..e2365cd 100644 (file)
--- a/modules/imgproc/src/smooth.cpp
+++ b/modules/imgproc/src/smooth.cpp
@@ -1196,12 +1196,12 @@ void cv::GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
      }
  #endif
  
-    if (type == CV_8U)
-    {
-        CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 && 
-                (!(borderType & BORDER_ISOLATED) || _src.offset() == 0),
-                GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType))
-    }
+    //if (type == CV_8U)
+    //{
+    //    CV_OCL_RUN_(_dst.isUMat() && _src.dims() <= 2 && 
+    //            (!(borderType & BORDER_ISOLATED) || _src.offset() == 0),
+    //            GaussianBlur_8u(_src, _dst, ksize, sigma1, sigma2, borderType))
+    //}
  
      Mat kx, ky;
      createGaussianKernels(kx, ky, type, ksize, sigma1, sigma2);
diff --git a/modules/imgproc/test/ocl/test_filters.cpp b/modules/imgproc/test/ocl/test_filters.cpp

index a43a771..aee1f08 100644 (file)
--- a/modules/imgproc/test/ocl/test_filters.cpp
+++ b/modules/imgproc/test/ocl/test_filters.cpp
@@ -209,7 +209,7 @@ typedef FilterTestBase GaussianBlurTest;
  
  OCL_TEST_P(GaussianBlurTest, Mat)
  {
-    for (int j = 0; j < test_loop_times; j++)
+    for (int j = 0; j < test_loop_times + 100; j++)
      {
          random_roi();
  
@@ -222,7 +222,8 @@ OCL_TEST_P(GaussianBlurTest, Mat)
  
          if (checkNorm2(dst_roi, udst_roi) > 2 && CV_MAT_DEPTH(type) == CV_8U)
          {
-            Mat udst = udst_roi.getMat(ACCESS_READ);
+            std::cout << "i = " << j << std::endl;
+            Mat uudst = udst_roi.getMat(ACCESS_READ);
              Mat diff; 
              absdiff(dst_roi, udst, diff);
              int nonZero = countNonZero(diff);
@@ -231,11 +232,15 @@ OCL_TEST_P(GaussianBlurTest, Mat)
              minMaxLoc(diff, (double*)0, &max, (Point*) 0, &maxn);
  
              uchar a = dst_roi.at<uchar>(maxn);
-            uchar b = udst.at<uchar>(maxn);
+            uchar b = uudst.at<uchar>(maxn);
  
+            std::cout << "dst_roi" << dst_roi << std::endl;
+            std::cout << "udst_roi" << uudst << std::endl;
          }
  
-        Near(CV_MAT_DEPTH(type) == CV_8U ? 2 : 5e-5, false);
+        
+
+        Near(CV_MAT_DEPTH(type) == CV_8U ? 1 : 5e-5, false);
      }
  }
author	Alexander Karsakov <alexander.karsakov@itseez.com>
	Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)
committer	Alexander Karsakov <alexander.karsakov@itseez.com>
	Mon, 31 Mar 2014 12:45:15 +0000 (16:45 +0400)
modules/imgproc/src/filter.cpp		patch \| blob \| history
modules/imgproc/src/opencl/filterSepCol.cl		patch \| blob \| history
modules/imgproc/src/opencl/filterSepRow.cl		patch \| blob \| history
modules/imgproc/src/smooth.cpp		patch \| blob \| history
modules/imgproc/test/ocl/test_filters.cpp		patch \| blob \| history