fix cuda match template:

author Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Thu, 25 Dec 2014 12:41:14 +0000 (15:41 +0300)

committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>

Thu, 25 Dec 2014 16:23:16 +0000 (19:23 +0300)
author Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Thu, 25 Dec 2014 12:41:14 +0000 (15:41 +0300)
committer Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Thu, 25 Dec 2014 16:23:16 +0000 (19:23 +0300)
diff --git a/modules/cudaimgproc/src/cuda/match_template.cu b/modules/cudaimgproc/src/cuda/match_template.cu

index 832878f..87ee71e 100644 (file)
--- a/modules/cudaimgproc/src/cuda/match_template.cu
+++ b/modules/cudaimgproc/src/cuda/match_template.cu
@@ -218,7 +218,7 @@ namespace cv { namespace cuda { namespace device
          // Prepared_SQDIFF
  
          template <int cn>
-        __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
+        __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<double> image_sqsum, double templ_sqsum, PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
              const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -234,7 +234,7 @@ namespace cv { namespace cuda { namespace device
          }
  
          template <int cn>
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream)
          {
              const dim3 threads(32, 8);
              const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
@@ -246,10 +246,10 @@ namespace cv { namespace cuda { namespace device
                  cudaSafeCall( cudaDeviceSynchronize() );
          }
  
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, int cn,
                                               cudaStream_t stream)
          {
-            typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
+            typedef void (*caller_t)(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream);
  
              static const caller_t callers[] =
              {
@@ -287,8 +287,8 @@ namespace cv { namespace cuda { namespace device
  
          template <int cn>
          __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
-                int w, int h, const PtrStep<unsigned long long> image_sqsum,
-                unsigned long long templ_sqsum, PtrStepSzf result)
+                int w, int h, const PtrStep<double> image_sqsum,
+                double templ_sqsum, PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
              const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -305,7 +305,7 @@ namespace cv { namespace cuda { namespace device
          }
  
          template <int cn>
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum,
                                                      PtrStepSzf result, cudaStream_t stream)
          {
              const dim3 threads(32, 8);
@@ -319,10 +319,10 @@ namespace cv { namespace cuda { namespace device
          }
  
  
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum,
                                                      PtrStepSzf result, int cn, cudaStream_t stream)
          {
-            typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
+            typedef void (*caller_t)(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result, cudaStream_t stream);
              static const caller_t callers[] =
              {
                  0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
@@ -334,7 +334,7 @@ namespace cv { namespace cuda { namespace device
          //////////////////////////////////////////////////////////////////////
          // Prepared_CCOFF
  
-        __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, PtrStepSzf result)
+        __global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<int> image_sum, PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
              const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -349,7 +349,7 @@ namespace cv { namespace cuda { namespace device
              }
          }
  
-        void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream)
+        void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
              dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
@@ -365,8 +365,8 @@ namespace cv { namespace cuda { namespace device
  
          __global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
                  int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
-                const PtrStep<unsigned int> image_sum_r,
-                const PtrStep<unsigned int> image_sum_g,
+                const PtrStep<int> image_sum_r,
+                const PtrStep<int> image_sum_g,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -388,9 +388,9 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_8UC2(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r,
-                const PtrStepSz<unsigned int> image_sum_g,
-                unsigned int templ_sum_r, unsigned int templ_sum_g,
+                const PtrStepSz<int> image_sum_r,
+                const PtrStepSz<int> image_sum_g,
+                int templ_sum_r, int templ_sum_g,
                  PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -412,9 +412,9 @@ namespace cv { namespace cuda { namespace device
                  float templ_sum_scale_r,
                  float templ_sum_scale_g,
                  float templ_sum_scale_b,
-                const PtrStep<unsigned int> image_sum_r,
-                const PtrStep<unsigned int> image_sum_g,
-                const PtrStep<unsigned int> image_sum_b,
+                const PtrStep<int> image_sum_r,
+                const PtrStep<int> image_sum_g,
+                const PtrStep<int> image_sum_b,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -440,12 +440,12 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_8UC3(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r,
-                const PtrStepSz<unsigned int> image_sum_g,
-                const PtrStepSz<unsigned int> image_sum_b,
-                unsigned int templ_sum_r,
-                unsigned int templ_sum_g,
-                unsigned int templ_sum_b,
+                const PtrStepSz<int> image_sum_r,
+                const PtrStepSz<int> image_sum_g,
+                const PtrStepSz<int> image_sum_b,
+                int templ_sum_r,
+                int templ_sum_g,
+                int templ_sum_b,
                  PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -471,10 +471,10 @@ namespace cv { namespace cuda { namespace device
                  float templ_sum_scale_g,
                  float templ_sum_scale_b,
                  float templ_sum_scale_a,
-                const PtrStep<unsigned int> image_sum_r,
-                const PtrStep<unsigned int> image_sum_g,
-                const PtrStep<unsigned int> image_sum_b,
-                const PtrStep<unsigned int> image_sum_a,
+                const PtrStep<int> image_sum_r,
+                const PtrStep<int> image_sum_g,
+                const PtrStep<int> image_sum_b,
+                const PtrStep<int> image_sum_a,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -504,14 +504,14 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_8UC4(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r,
-                const PtrStepSz<unsigned int> image_sum_g,
-                const PtrStepSz<unsigned int> image_sum_b,
-                const PtrStepSz<unsigned int> image_sum_a,
-                unsigned int templ_sum_r,
-                unsigned int templ_sum_g,
-                unsigned int templ_sum_b,
-                unsigned int templ_sum_a,
+                const PtrStepSz<int> image_sum_r,
+                const PtrStepSz<int> image_sum_g,
+                const PtrStepSz<int> image_sum_b,
+                const PtrStepSz<int> image_sum_a,
+                int templ_sum_r,
+                int templ_sum_g,
+                int templ_sum_b,
+                int templ_sum_a,
                  PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -537,8 +537,8 @@ namespace cv { namespace cuda { namespace device
          __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
                  int w, int h, float weight,
                  float templ_sum_scale, float templ_sqsum_scale,
-                const PtrStep<unsigned int> image_sum,
-                const PtrStep<unsigned long long> image_sqsum,
+                const PtrStep<int> image_sum,
+                const PtrStep<double> image_sqsum,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -559,9 +559,9 @@ namespace cv { namespace cuda { namespace device
          }
  
          void matchTemplatePrepared_CCOFF_NORMED_8U(
-                    int w, int h, const PtrStepSz<unsigned int> image_sum,
-                    const PtrStepSz<unsigned long long> image_sqsum,
-                    unsigned int templ_sum, unsigned long long templ_sqsum,
+                    int w, int h, const PtrStepSz<int> image_sum,
+                    const PtrStepSz<double> image_sqsum,
+                    int templ_sum, double templ_sqsum,
                      PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -586,8 +586,8 @@ namespace cv { namespace cuda { namespace device
                  int w, int h, float weight,
                  float templ_sum_scale_r, float templ_sum_scale_g,
                  float templ_sqsum_scale,
-                const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
-                const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
+                const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
+                const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -618,10 +618,10 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_NORMED_8UC2(
                      int w, int h,
-                    const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                    const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                    unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                    unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
+                    const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                    const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                    int templ_sum_r, double templ_sqsum_r,
+                    int templ_sum_g, double templ_sqsum_g,
                      PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -652,9 +652,9 @@ namespace cv { namespace cuda { namespace device
                  int w, int h, float weight,
                  float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
                  float templ_sqsum_scale,
-                const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
-                const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
-                const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
+                const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
+                const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
+                const PtrStep<int> image_sum_b, const PtrStep<double> image_sqsum_b,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -693,12 +693,12 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_NORMED_8UC3(
                      int w, int h,
-                    const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                    const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                    const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
-                    unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                    unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
-                    unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
+                    const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                    const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                    const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
+                    int templ_sum_r, double templ_sqsum_r,
+                    int templ_sum_g, double templ_sqsum_g,
+                    int templ_sum_b, double templ_sqsum_b,
                      PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -732,10 +732,10 @@ namespace cv { namespace cuda { namespace device
                  int w, int h, float weight,
                  float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
                  float templ_sum_scale_a, float templ_sqsum_scale,
-                const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
-                const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
-                const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
-                const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
+                const PtrStep<int> image_sum_r, const PtrStep<double> image_sqsum_r,
+                const PtrStep<int> image_sum_g, const PtrStep<double> image_sqsum_g,
+                const PtrStep<int> image_sum_b, const PtrStep<double> image_sqsum_b,
+                const PtrStep<int> image_sum_a, const PtrStep<double> image_sqsum_a,
                  PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -780,14 +780,14 @@ namespace cv { namespace cuda { namespace device
  
          void matchTemplatePrepared_CCOFF_NORMED_8UC4(
                      int w, int h,
-                    const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                    const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                    const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
-                    const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
-                    unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                    unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
-                    unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
-                    unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
+                    const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                    const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                    const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
+                    const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
+                    int templ_sum_r, double templ_sqsum_r,
+                    int templ_sum_g, double templ_sqsum_g,
+                    int templ_sum_b, double templ_sqsum_b,
+                    int templ_sum_a, double templ_sqsum_a,
                      PtrStepSzf result, cudaStream_t stream)
          {
              dim3 threads(32, 8);
@@ -823,8 +823,8 @@ namespace cv { namespace cuda { namespace device
  
          template <int cn>
          __global__ void normalizeKernel_8U(
-                int w, int h, const PtrStep<unsigned long long> image_sqsum,
-                unsigned long long templ_sqsum, PtrStepSzf result)
+                int w, int h, const PtrStep<double> image_sqsum,
+                double templ_sqsum, PtrStepSzf result)
          {
              const int x = blockIdx.x * blockDim.x + threadIdx.x;
              const int y = blockIdx.y * blockDim.y + threadIdx.y;
@@ -838,8 +838,8 @@ namespace cv { namespace cuda { namespace device
              }
          }
  
-        void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
-                          unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
+        void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
+                          double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
          {
              dim3 threads(32, 8);
              dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
diff --git a/modules/cudaimgproc/src/match_template.cpp b/modules/cudaimgproc/src/match_template.cpp

index 19d0915..c5ab143 100644 (file)
--- a/modules/cudaimgproc/src/match_template.cpp
+++ b/modules/cudaimgproc/src/match_template.cpp
@@ -61,77 +61,77 @@ namespace cv { namespace cuda { namespace device
          void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
          void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
  
-        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
+        void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
              int cn, cudaStream_t stream);
  
-        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
+        void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
              int cn, cudaStream_t stream);
  
-        void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream);
+        void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_8UC2(
              int w, int h,
-            const PtrStepSz<unsigned int> image_sum_r,
-            const PtrStepSz<unsigned int> image_sum_g,
-            unsigned int templ_sum_r,
-            unsigned int templ_sum_g,
+            const PtrStepSz<int> image_sum_r,
+            const PtrStepSz<int> image_sum_g,
+            int templ_sum_r,
+            int templ_sum_g,
              PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_8UC3(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r,
-                const PtrStepSz<unsigned int> image_sum_g,
-                const PtrStepSz<unsigned int> image_sum_b,
-                unsigned int templ_sum_r,
-                unsigned int templ_sum_g,
-                unsigned int templ_sum_b,
+                const PtrStepSz<int> image_sum_r,
+                const PtrStepSz<int> image_sum_g,
+                const PtrStepSz<int> image_sum_b,
+                int templ_sum_r,
+                int templ_sum_g,
+                int templ_sum_b,
                  PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_8UC4(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r,
-                const PtrStepSz<unsigned int> image_sum_g,
-                const PtrStepSz<unsigned int> image_sum_b,
-                const PtrStepSz<unsigned int> image_sum_a,
-                unsigned int templ_sum_r,
-                unsigned int templ_sum_g,
-                unsigned int templ_sum_b,
-                unsigned int templ_sum_a,
+                const PtrStepSz<int> image_sum_r,
+                const PtrStepSz<int> image_sum_g,
+                const PtrStepSz<int> image_sum_b,
+                const PtrStepSz<int> image_sum_a,
+                int templ_sum_r,
+                int templ_sum_g,
+                int templ_sum_b,
+                int templ_sum_a,
                  PtrStepSzf result, cudaStream_t stream);
  
  
          void matchTemplatePrepared_CCOFF_NORMED_8U(
-                int w, int h, const PtrStepSz<unsigned int> image_sum,
-                const PtrStepSz<unsigned long long> image_sqsum,
-                unsigned int templ_sum, unsigned long long templ_sqsum,
+                int w, int h, const PtrStepSz<int> image_sum,
+                const PtrStepSz<double> image_sqsum,
+                int templ_sum, double templ_sqsum,
                  PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_NORMED_8UC2(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
+                const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                int templ_sum_r, double templ_sqsum_r,
+                int templ_sum_g, double templ_sqsum_g,
                  PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_NORMED_8UC3(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
-                unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
-                unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
+                const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
+                int templ_sum_r, double templ_sqsum_r,
+                int templ_sum_g, double templ_sqsum_g,
+                int templ_sum_b, double templ_sqsum_b,
                  PtrStepSzf result, cudaStream_t stream);
          void matchTemplatePrepared_CCOFF_NORMED_8UC4(
                  int w, int h,
-                const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
-                const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
-                const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
-                const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
-                unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
-                unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
-                unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
-                unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
+                const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
+                const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
+                const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
+                const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
+                int templ_sum_r, double templ_sqsum_r,
+                int templ_sum_g, double templ_sqsum_g,
+                int templ_sum_b, double templ_sqsum_b,
+                int templ_sum_a, double templ_sqsum_a,
                  PtrStepSzf result, cudaStream_t stream);
  
-        void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
-                          unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
+        void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
+                          double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
  
          void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
      }
@@ -290,7 +290,7 @@ namespace
  
          cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
  
-        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
+        double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
  
          normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
      }
@@ -361,7 +361,7 @@ namespace
  
          cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
  
-        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
+        double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
  
          match_CCORR_.match(image, templ, _result, stream);
          GpuMat result = _result.getGpuMat();
@@ -400,7 +400,7 @@ namespace
  
          cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
  
-        unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
+        double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
  
          match_CCORR_.match(image, templ, _result, stream);
          GpuMat result = _result.getGpuMat();
@@ -446,7 +446,7 @@ namespace
              image_sums_.resize(1);
              cuda::integral(image, image_sums_[0], intBuffer_, stream);
  
-            unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
+            int templ_sum = (int) cuda::sum(templ)[0];
  
              matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
          }
@@ -465,19 +465,19 @@ namespace
              case 2:
                  matchTemplatePrepared_CCOFF_8UC2(
                          templ.cols, templ.rows, image_sums_[0], image_sums_[1],
-                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1],
+                        (int) templ_sum[0], (int) templ_sum[1],
                          result, StreamAccessor::getStream(stream));
                  break;
              case 3:
                  matchTemplatePrepared_CCOFF_8UC3(
                          templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
-                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2],
+                        (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2],
                          result, StreamAccessor::getStream(stream));
                  break;
              case 4:
                  matchTemplatePrepared_CCOFF_8UC4(
                          templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
-                        (unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], (unsigned int) templ_sum[3],
+                        (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3],
                          result, StreamAccessor::getStream(stream));
                  break;
              default:
@@ -532,8 +532,8 @@ namespace
              image_sqsums_.resize(1);
              cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
  
-            unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
-            unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ)[0];
+            int templ_sum = (int) cuda::sum(templ)[0];
+            double templ_sqsum = cuda::sqrSum(templ)[0];
  
              matchTemplatePrepared_CCOFF_NORMED_8U(
                      templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
@@ -561,8 +561,8 @@ namespace
                          templ.cols, templ.rows,
                          image_sums_[0], image_sqsums_[0],
                          image_sums_[1], image_sqsums_[1],
-                        (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
-                        (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
+                        (int)templ_sum[0], templ_sqsum[0],
+                        (int)templ_sum[1], templ_sqsum[1],
                          result, StreamAccessor::getStream(stream));
                  break;
              case 3:
@@ -571,9 +571,9 @@ namespace
                          image_sums_[0], image_sqsums_[0],
                          image_sums_[1], image_sqsums_[1],
                          image_sums_[2], image_sqsums_[2],
-                        (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
-                        (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
-                        (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
+                        (int)templ_sum[0], templ_sqsum[0],
+                        (int)templ_sum[1], templ_sqsum[1],
+                        (int)templ_sum[2], templ_sqsum[2],
                          result, StreamAccessor::getStream(stream));
                  break;
              case 4:
@@ -583,10 +583,10 @@ namespace
                          image_sums_[1], image_sqsums_[1],
                          image_sums_[2], image_sqsums_[2],
                          image_sums_[3], image_sqsums_[3],
-                        (unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
-                        (unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
-                        (unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
-                        (unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3],
+                        (int)templ_sum[0], templ_sqsum[0],
+                        (int)templ_sum[1], templ_sqsum[1],
+                        (int)templ_sum[2], templ_sqsum[2],
+                        (int)templ_sum[3], templ_sqsum[3],
                          result, StreamAccessor::getStream(stream));
                  break;
              default:
author	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Thu, 25 Dec 2014 12:41:14 +0000 (15:41 +0300)
committer	Vladislav Vinogradov <vlad.vinogradov@itseez.com>
	Thu, 25 Dec 2014 16:23:16 +0000 (19:23 +0300)
modules/cudaimgproc/src/cuda/match_template.cu		patch \| blob \| history
modules/cudaimgproc/src/match_template.cpp		patch \| blob \| history