Fixed gpu::matchTemplate for correct handling of big templates. Added tests

author Alexey Spizhevoy <no@email>

Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)

committer Alexey Spizhevoy <no@email>

Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)
author Alexey Spizhevoy <no@email>
Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)
committer Alexey Spizhevoy <no@email>
Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)
diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu

index afa1d1f..3d4f8eb 100644 (file)
--- a/modules/gpu/src/cuda/match_template.cu
+++ b/modules/gpu/src/cuda/match_template.cu
@@ -216,7 +216,7 @@ namespace cv { namespace gpu { namespace device
          // Prepared_SQDIFF\r
  \r
          template <int cn>\r
-        __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)\r
+        __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result)\r
          {\r
              const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
              const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
@@ -232,7 +232,7 @@ namespace cv { namespace gpu { namespace device
          }\r
  \r
          template <int cn>\r
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream)\r
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream)\r
          {\r
              const dim3 threads(32, 8);\r
              const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));\r
@@ -244,10 +244,10 @@ namespace cv { namespace gpu { namespace device
                  cudaSafeCall( cudaDeviceSynchronize() );\r
          }\r
  \r
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn, \r
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn,\r
                                               cudaStream_t stream)\r
          {\r
-            typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
+            typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
  \r
              static const caller_t callers[] = \r
              {\r
@@ -284,7 +284,9 @@ namespace cv { namespace gpu { namespace device
  \r
  \r
          template <int cn>\r
-        __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)\r
+        __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(\r
+                int w, int h, const PtrStep<unsigned long long> image_sqsum,\r
+                unsigned long long templ_sqsum, DevMem2Df result)\r
          {\r
              const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
              const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
@@ -301,7 +303,7 @@ namespace cv { namespace gpu { namespace device
          }\r
  \r
          template <int cn>\r
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, \r
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,\r
                                                      DevMem2Df result, cudaStream_t stream)\r
          {\r
              const dim3 threads(32, 8);\r
@@ -315,10 +317,10 @@ namespace cv { namespace gpu { namespace device
          }\r
  \r
  \r
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, \r
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,\r
                                                      DevMem2Df result, int cn, cudaStream_t stream)\r
          {\r
-            typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
+            typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
              static const caller_t callers[] = \r
              {\r
                  0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>\r
diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu

index 6d3d7c5..fbf5ce5 100644 (file)
--- a/modules/gpu/src/cuda/matrix_reductions.cu
+++ b/modules/gpu/src/cuda/matrix_reductions.cu
@@ -1739,7 +1739,7 @@ namespace cv { namespace gpu { namespace device
              template <typename T>\r
              void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)\r
              {\r
-                typedef typename SumType<T>::R R;\r
+                typedef double R;\r
  \r
                  dim3 threads, grid;\r
                  estimateThreadCfg(src.cols, src.rows, threads, grid);\r
diff --git a/modules/gpu/src/match_template.cpp b/modules/gpu/src/match_template.cpp

index 7066586..59b97e7 100644 (file)
--- a/modules/gpu/src/match_template.cpp
+++ b/modules/gpu/src/match_template.cpp
@@ -62,10 +62,10 @@ namespace cv { namespace gpu { namespace device
          void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);\r
          void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);\r
  \r
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, \r
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,\r
              int cn, cudaStream_t stream);\r
  \r
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, \r
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,\r
              int cn, cudaStream_t stream);\r
  \r
          void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);\r
@@ -248,7 +248,7 @@ namespace
          GpuMat img_sqsum;\r
          sqrIntegral(image.reshape(1), img_sqsum, stream);\r
  \r
-        unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];\r
+        unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];\r
  \r
          matchTemplate_CCORR_8U(image, templ, result, stream);\r
          matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));\r
@@ -260,7 +260,7 @@ namespace
          GpuMat img_sqsum;\r
          sqrIntegral(image.reshape(1), img_sqsum, stream);\r
  \r
-        unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];\r
+        unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];\r
  \r
          matchTemplate_CCORR_8U(image, templ, result, stream);\r
          matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));\r
diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp

index bd24f7d..a4691e4 100644 (file)
--- a/modules/gpu/test/test_imgproc.cpp
+++ b/modules/gpu/test/test_imgproc.cpp
@@ -2775,6 +2775,52 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, Combine(
                          ALL_DEVICES,
                          Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
  
+
+class MatchTemplate_CanFindBigTemplate : public TestWithParam<cv::gpu::DeviceInfo>
+{
+    virtual void SetUp()
+    {
+        cv::gpu::setDevice(GetParam().deviceID());
+    }
+};
+
+TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
+{
+    cv::Mat scene = readImage("matchtemplate/scene.jpg");
+    cv::Mat templ = readImage("matchtemplate/template.jpg");
+
+    cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
+    cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF_NORMED);
+
+    double minVal;
+    cv::Point minLoc;
+    cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
+
+    ASSERT_GE(minVal, 0);
+    ASSERT_LT(minVal, 1e-3);
+    ASSERT_EQ(344, minLoc.x);
+    ASSERT_EQ(0, minLoc.y);
+}
+
+TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
+{
+    cv::Mat scene = readImage("matchtemplate/scene.jpg");
+    cv::Mat templ = readImage("matchtemplate/template.jpg");
+
+    cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
+    cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF);
+
+    double minVal;
+    cv::Point minLoc;
+    cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
+
+    ASSERT_GE(minVal, 0);
+    ASSERT_EQ(344, minLoc.x);
+    ASSERT_EQ(0, minLoc.y);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
+
  ////////////////////////////////////////////////////////////////////////////
  // MulSpectrums
author	Alexey Spizhevoy <no@email>
	Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)
committer	Alexey Spizhevoy <no@email>
	Mon, 26 Mar 2012 09:19:33 +0000 (09:19 +0000)
modules/gpu/src/cuda/match_template.cu		patch \| blob \| history
modules/gpu/src/cuda/matrix_reductions.cu		patch \| blob \| history
modules/gpu/src/match_template.cpp		patch \| blob \| history
modules/gpu/test/test_imgproc.cpp		patch \| blob \| history