From 97282d8ff8a9ef8d31e6643793f8ffb31c686190 Mon Sep 17 00:00:00 2001
From: Alexey Spizhevoy <no@email>
Date: Fri, 8 Apr 2011 08:04:56 +0000
Subject: [PATCH] added downsample function into gpu module, refactored it a
 little bit, added guard for CUDA related include in
 cascadeclassifier_nvidia_api.cpp

---
 modules/gpu/include/opencv2/gpu/gpu.hpp      |  8 +++--
 modules/gpu/src/blend.cpp                    |  6 ++--
 modules/gpu/src/cuda/blend.cu                |  2 +-
 modules/gpu/src/cuda/imgproc.cu              | 27 +++++++++++++++
 modules/gpu/src/imgproc_gpu.cpp              | 27 +++++++++++++++
 modules/gpu/test/test_blend.cpp              | 18 ++++++----
 modules/gpu/test/test_imgproc_gpu.cpp        | 50 ++++++++++++++++++++++++++++
 samples/gpu/cascadeclassifier_nvidia_api.cpp |  3 ++
 8 files changed, 127 insertions(+), 14 deletions(-)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index a19809a..874666c 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -786,11 +786,13 @@ namespace cv
         //! computes the proximity map for the raster template and the image where the template is searched for
         CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method);
 
+        //! downsamples image
+        CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst, int k=2);
+
         //! performs linear blending of two images
         //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
-        CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, 
-                                    const GpuMat& weights1, const GpuMat& weights2, GpuMat& result);
-
+        CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, 
+                                    GpuMat& result);
 
         ////////////////////////////// Matrix reductions //////////////////////////////
 
diff --git a/modules/gpu/src/blend.cpp b/modules/gpu/src/blend.cpp
index 7c782e6..c5dfd58 100644
--- a/modules/gpu/src/blend.cpp
+++ b/modules/gpu/src/blend.cpp
@@ -63,8 +63,8 @@ namespace cv { namespace gpu
                                const PtrStepf weights1, const PtrStepf weights2, PtrStep result);
 }}
 
-void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, 
-                          const GpuMat& weights1, const GpuMat& weights2, GpuMat& result)
+void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, 
+                          GpuMat& result)
 {
     CV_Assert(img1.size() == img2.size());
     CV_Assert(img1.type() == img2.type());
@@ -94,7 +94,7 @@ void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2,
                           (const PtrStepf)weights1, (const PtrStepf)weights2, (PtrStepf)result);
         break;
     default:
-        CV_Error(CV_StsBadArg, "unsupported image depth in linear blending method");
+        CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function");
     }
 }
 
diff --git a/modules/gpu/src/cuda/blend.cu b/modules/gpu/src/cuda/blend.cu
index a9b85c1..42b5a63 100644
--- a/modules/gpu/src/cuda/blend.cu
+++ b/modules/gpu/src/cuda/blend.cu
@@ -73,7 +73,7 @@ namespace cv { namespace gpu
         dim3 threads(16, 16);
         dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
         
-        blendLinearKernel<T><<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
+        blendLinearKernel<<<grid, threads>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
         cudaSafeCall(cudaThreadSynchronize());
     }
 
diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu
index 2c94f83..dad5335 100644
--- a/modules/gpu/src/cuda/imgproc.cu
+++ b/modules/gpu/src/cuda/imgproc.cu
@@ -883,5 +883,32 @@ namespace cv { namespace gpu { namespace imgproc
         cudaSafeCall(cudaThreadSynchronize());
     }
 
+    /////////////////////////////////////////////////////////////////////////
+    // downsample
+
+    template <typename T>
+    __global__ void downsampleKernel(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
+    {
+        int x = blockIdx.x * blockDim.x + threadIdx.x;
+        int y = blockIdx.y * blockDim.y + threadIdx.y;
+
+        if (x < cols && y < rows)
+            dst.ptr(y)[x] = src.ptr(y * k)[x * k];
+    }
+
+
+    template <typename T>
+    void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst)
+    {
+        dim3 threads(16, 16);
+        dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
+
+        downsampleKernel<<<grid, threads>>>(src, rows, cols, k, dst);
+        cudaSafeCall(cudaThreadSynchronize());
+    }
+
+    template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst);
+    template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst);
+
 }}}
 
diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp
index 486ca45..0db8f5a 100644
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -82,6 +82,7 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }
 void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }
 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }
 void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }
+void cv::gpu::downsample(const GpuMat&, GpuMat&, int) { throw_nogpu(); }
 
 
 #else /* !defined (HAVE_CUDA) */
@@ -1355,7 +1356,33 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
     cufftSafeCall(cufftDestroy(planC2R));
 }
 
+////////////////////////////////////////////////////////////////////
+// downsample
 
+namespace cv { namespace gpu { namespace imgproc
+{
+    template <typename T>
+    void downsampleCaller(const PtrStep_<T> src, int rows, int cols, int k, PtrStep_<T> dst);
+}}}
+
+void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, int k)
+{
+    CV_Assert(src.channels() == 1);    
+
+    dst.create((src.rows + k - 1) / k, (src.cols + k - 1) / k, src.type());
+
+    switch (src.depth())
+    {
+    case CV_8U:
+        imgproc::downsampleCaller((const PtrStep)src, dst.rows, dst.cols, k, (PtrStep)dst);
+        break;
+    case CV_32F:
+        imgproc::downsampleCaller((const PtrStepf)src, dst.rows, dst.cols, k, (PtrStepf)dst);
+        break;
+    default:
+        CV_Error(CV_StsUnsupportedFormat, "bad image depth in downsample function");
+    }
+}
 
 #endif /* !defined (HAVE_CUDA) */
 
diff --git a/modules/gpu/test/test_blend.cpp b/modules/gpu/test/test_blend.cpp
index 60955ad..9479695 100644
--- a/modules/gpu/test/test_blend.cpp
+++ b/modules/gpu/test/test_blend.cpp
@@ -47,8 +47,9 @@ using namespace cv::gpu;
 
 TEST(blendLinear, accuracy_on_8U)
 {
-    Size size(607, 1021);
-    RNG rng(0);
+    RNG& rng = cvtest::TS::ptr()->get_rng();
+    Size size(200 + cvtest::randInt(rng) % 1000,
+              200 + cvtest::randInt(rng) % 1000);
     for (int cn = 1; cn <= 4; ++cn)
     {
         Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_8U, cn), 0, 255, false);
@@ -66,14 +67,16 @@ TEST(blendLinear, accuracy_on_8U)
             }
         GpuMat d_result;
         blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result);
-        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1) << ", cn=" << cn;
+        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1) 
+            << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn;
     }
 }
 
 TEST(blendLinear, accuracy_on_32F)
 {
-    Size size(607, 1021);
-    RNG rng(0);
+    RNG& rng = cvtest::TS::ptr()->get_rng();
+    Size size(200 + cvtest::randInt(rng) % 1000,
+              200 + cvtest::randInt(rng) % 1000);
     for (int cn = 1; cn <= 4; ++cn)
     {
         Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_32F, cn), 0, 1, false);
@@ -91,6 +94,7 @@ TEST(blendLinear, accuracy_on_32F)
             }
         GpuMat d_result;
         blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result);
-        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1e-3) << ", cn=" << cn;
+        ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1e-3)
+            << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn;
     }
-}
\ No newline at end of file
+}
diff --git a/modules/gpu/test/test_imgproc_gpu.cpp b/modules/gpu/test/test_imgproc_gpu.cpp
index b8363a0..f61698e 100644
--- a/modules/gpu/test/test_imgproc_gpu.cpp
+++ b/modules/gpu/test/test_imgproc_gpu.cpp
@@ -914,3 +914,53 @@ TEST(minEigen, accuracy) { CV_GpuCornerMinEigenValTest test; test.safe_run(); }
 TEST(columnSum, accuracy) { CV_GpuColumnSumTest test; test.safe_run(); }
 TEST(norm, accuracy) { CV_GpuNormTest test; test.safe_run(); }
 TEST(reprojectImageTo3D, accuracy) { CV_GpuReprojectImageTo3DTest test; test.safe_run(); }
+
+TEST(downsample, accuracy_on_8U)
+{
+    RNG& rng = cvtest::TS::ptr()->get_rng();
+    Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
+    Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false);
+
+    for (int k = 2; k <= 5; ++k)
+    {
+        GpuMat d_dst;
+        downsample(GpuMat(src), d_dst, k);       
+
+        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
+        ASSERT_EQ(dst_gold_size.width, d_dst.cols) 
+            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+        ASSERT_EQ(dst_gold_size.height, d_dst.rows) 
+            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+
+        Mat dst = d_dst;
+        for (int y = 0; y < dst.rows; ++y)
+            for (int x = 0; x < dst.cols; ++x)
+                ASSERT_EQ(src.at<uchar>(y * k, x * k), dst.at<uchar>(y, x))
+                    << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+    }
+}
+
+TEST(downsample, accuracy_on_32F)
+{
+    RNG& rng = cvtest::TS::ptr()->get_rng();
+    Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000);
+    Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1, false);
+
+    for (int k = 2; k <= 5; ++k)
+    {
+        GpuMat d_dst;
+        downsample(GpuMat(src), d_dst, k);       
+
+        Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k);
+        ASSERT_EQ(dst_gold_size.width, d_dst.cols) 
+            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+        ASSERT_EQ(dst_gold_size.height, d_dst.rows) 
+            << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+
+        Mat dst = d_dst;
+        for (int y = 0; y < dst.rows; ++y)
+            for (int x = 0; x < dst.cols; ++x)
+                ASSERT_FLOAT_EQ(src.at<float>(y * k, x * k), dst.at<float>(y, x))
+                    << "rows=" << size.height << ", cols=" << size.width << ", k=" << k;
+    }
+}
diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp
index 2dd3945..2faabb3 100644
--- a/samples/gpu/cascadeclassifier_nvidia_api.cpp
+++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp
@@ -5,7 +5,10 @@
 #include <iomanip>
 #include <opencv2/opencv.hpp>
 #include <opencv2/gpu/gpu.hpp>
+
+#ifdef HAVE_CUDA
 #include "NCVHaarObjectDetection.hpp"
+#endif
 
 using namespace std;
 using namespace cv;
-- 
2.7.4