added support of scaling into gpu::dft, refactored gpu::convolve

author Alexey Spizhevoy <no@email>

Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)

committer Alexey Spizhevoy <no@email>

Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)
author Alexey Spizhevoy <no@email>
Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)
committer Alexey Spizhevoy <no@email>
Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu

index 7b66565..7d3ff89 100644 (file)
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -752,7 +752,6 @@ namespace cv { namespace gpu { namespace imgproc
      //////////////////////////////////////////////////////////////////////////\r
      // mulSpectrums\r
  \r
-\r
      __global__ void mulSpectrumsKernel(const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
                                         DevMem2D_<cufftComplex> c)\r
      {\r
@@ -776,11 +775,9 @@ namespace cv { namespace gpu { namespace imgproc
          cudaSafeCall(cudaThreadSynchronize());\r
      }\r
  \r
-\r
      //////////////////////////////////////////////////////////////////////////\r
      // mulSpectrums_CONJ\r
  \r
-\r
      __global__ void mulSpectrumsKernel_CONJ(\r
              const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
              DevMem2D_<cufftComplex> c)\r
@@ -805,11 +802,9 @@ namespace cv { namespace gpu { namespace imgproc
          cudaSafeCall(cudaThreadSynchronize());\r
      }\r
  \r
-\r
      //////////////////////////////////////////////////////////////////////////\r
      // mulAndScaleSpectrums\r
  \r
-\r
      __global__ void mulAndScaleSpectrumsKernel(\r
              const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b, \r
              float scale, DevMem2D_<cufftComplex> c)\r
@@ -835,11 +830,9 @@ namespace cv { namespace gpu { namespace imgproc
          cudaSafeCall(cudaThreadSynchronize());\r
      }\r
  \r
-\r
      //////////////////////////////////////////////////////////////////////////\r
      // mulAndScaleSpectrums_CONJ\r
  \r
-\r
      __global__ void mulAndScaleSpectrumsKernel_CONJ(\r
              const PtrStep_<cufftComplex> a, const PtrStep_<cufftComplex> b,\r
              float scale, DevMem2D_<cufftComplex> c)\r
@@ -865,6 +858,5 @@ namespace cv { namespace gpu { namespace imgproc
          cudaSafeCall(cudaThreadSynchronize());\r
      }\r
  \r
-\r
  }}}\r
  \r
diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp

index 8a94630..76e079a 100644 (file)
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -1144,9 +1144,6 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
      bool is_complex_input = src.channels() == 2;\r
      bool is_complex_output = !(flags & DFT_REAL_OUTPUT);\r
  \r
-    // We don't support scaled transform\r
-    CV_Assert(!is_scaled_dft);\r
-\r
      // We don't support real-to-real transform\r
      CV_Assert(is_complex_input || is_complex_output);\r
  \r
@@ -1178,6 +1175,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
      if (is_complex_input) \r
          dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;\r
  \r
+    int dft_rows = src_aux.rows;\r
      int dft_cols = src_aux.cols;\r
      if (is_complex_input && !is_complex_output)\r
          dft_cols = (src_aux.cols - 1) * 2 + (int)odd;\r
@@ -1185,9 +1183,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
  \r
      cufftHandle plan;\r
      if (is_1d_input || is_row_dft)\r
-        cufftPlan1d(&plan, dft_cols, dft_type, src_aux.rows);\r
+        cufftPlan1d(&plan, dft_cols, dft_type, dft_rows);\r
      else\r
-        cufftPlan2d(&plan, src_aux.rows, dft_cols, dft_type);\r
+        cufftPlan2d(&plan, dft_rows, dft_cols, dft_type);\r
  \r
      GpuMat dst_data, dst_aux;\r
      int dst_cols, dst_rows;\r
@@ -1285,6 +1283,9 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
      }\r
  \r
      cufftSafeCall(cufftDestroy(plan));\r
+\r
+    if (is_scaled_dft)\r
+        multiply(dst, Scalar::all(1. / (dft_rows * dft_cols)), dst);\r
  }\r
  \r
  //////////////////////////////////////////////////////////////////////////////\r
@@ -1293,7 +1294,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, int flags, int nonZeroRows, bo
  namespace \r
  {\r
      // Estimates optimal block size\r
-    void crossCorrOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh)\r
+    void convolveOptBlockSize(int w, int h, int tw, int th, int& bw, int& bh)\r
      {\r
          int major, minor;\r
          getComputeCapability(getDevice(), major, minor);\r
@@ -1329,7 +1330,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
      result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);\r
  \r
      Size block_size;\r
-    crossCorrOptBlockSize(result.cols, result.rows, templ.cols, templ.rows, \r
+    convolveOptBlockSize(result.cols, result.rows, templ.cols, templ.rows, \r
                            block_size.width, block_size.height);\r
  \r
      Size dft_size;\r
@@ -1367,10 +1368,11 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
      {\r
          for (int x = 0; x < result.cols; x += block_size.width)\r
          {                \r
-            // Locate ROI in the source matrix\r
              Size image_roi_size;\r
              image_roi_size.width = std::min(x + dft_size.width, image.cols) - x;\r
              image_roi_size.height = std::min(y + dft_size.height, image.rows) - y;\r
+\r
+            // Locate ROI in the source matrix\r
              GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x), image.step);\r
  \r
              // Make source image block continous\r
@@ -1386,14 +1388,16 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
              cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(), \r
                                         result_data.ptr<cufftReal>()));\r
  \r
-            // Copy result block into appropriate part of the result matrix.\r
-            // We can't compute it inplace as the result of the CUFFT transforms\r
-            // is always continous, while the result matrix and its blocks can have gaps.\r
              Size result_roi_size;\r
              result_roi_size.width = std::min(x + block_size.width, result.cols) - x;\r
              result_roi_size.height = std::min(y + block_size.height, result.rows) - y;\r
+\r
              GpuMat result_roi(result_roi_size, CV_32F, (void*)(result.ptr<float>(y) + x), result.step);\r
              GpuMat result_block(result_roi_size, CV_32F, result_data.ptr(), dft_size.width * sizeof(cufftReal));\r
+\r
+            // Copy result block into appropriate part of the result matrix.\r
+            // We can't compute it inplace as the result of the CUFFT transforms\r
+            // is always continous, while the result matrix and its blocks can have gaps.\r
              result_block.copyTo(result_roi);\r
          }\r
      }\r
diff --git a/tests/gpu/src/dft_routines.cpp b/tests/gpu/src/dft_routines.cpp

index 6c8d5c7..0bda401 100644 (file)
--- a/tests/gpu/src/dft_routines.cpp
+++ b/tests/gpu/src/dft_routines.cpp
@@ -274,7 +274,7 @@ struct CV_GpuDftTest: CvTest
          rng.fill(mat, RNG::UNIFORM, Scalar::all(0.f), Scalar::all(10.f));\r
      }\r
  \r
-    bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f, float scale=1.f)\r
+    bool cmp(const Mat& gold, const Mat& mine, float max_err=1e-3f)\r
      {\r
          if (gold.size() != mine.size())\r
          {\r
@@ -299,7 +299,7 @@ struct CV_GpuDftTest: CvTest
              for (int j = 0; j < gold.cols * gold.channels(); ++j)\r
              {\r
                  float gold_ = gold.at<float>(i, j);\r
-                float mine_ = mine.at<float>(i, j) * scale;\r
+                float mine_ = mine.at<float>(i, j);\r
                  if (fabs(gold_ - mine_) > max_err)\r
                  {\r
                      ts->printf(CvTS::CONSOLE, "bad values at %d %d: gold=%f, mine=%f\n", j / gold.channels(), i, gold_, mine_);\r
@@ -382,7 +382,7 @@ struct CV_GpuDftTest: CvTest
              d_c = GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());\r
          }\r
          dft(GpuMat(a), d_b, 0);\r
-        dft(d_b, d_c, DFT_REAL_OUTPUT, 0, odd);\r
+        dft(d_b, d_c, DFT_REAL_OUTPUT | DFT_SCALE, 0, odd);\r
  \r
          if (ok && inplace && d_b.ptr() != d_b_data.ptr())\r
          {\r
@@ -408,7 +408,7 @@ struct CV_GpuDftTest: CvTest
              ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);\r
              ok = false;\r
          }\r
-        if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f, 1.f / (rows * cols));\r
+        if (ok) ok = cmp(a, Mat(d_c), rows * cols * 1e-5f);\r
          if (!ok) \r
              ts->printf(CvTS::CONSOLE, "testR2CThenC2R failed: hint=%s, cols=%d, rows=%d\n", hint.c_str(), cols, rows);\r
      }\r
author	Alexey Spizhevoy <no@email>
	Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)
committer	Alexey Spizhevoy <no@email>
	Fri, 24 Dec 2010 06:48:23 +0000 (06:48 +0000)
modules/gpu/src/cuda/imgproc.cu		patch \| blob \| history
modules/gpu/src/imgproc_gpu.cpp		patch \| blob \| history
tests/gpu/src/dft_routines.cpp		patch \| blob \| history