From 97282d8ff8a9ef8d31e6643793f8ffb31c686190 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Fri, 8 Apr 2011 08:04:56 +0000 Subject: [PATCH] added downsample function into gpu module, refactored it a little bit, added guard for CUDA related include in cascadeclassifier_nvidia_api.cpp --- modules/gpu/include/opencv2/gpu/gpu.hpp | 8 +++-- modules/gpu/src/blend.cpp | 6 ++-- modules/gpu/src/cuda/blend.cu | 2 +- modules/gpu/src/cuda/imgproc.cu | 27 +++++++++++++++ modules/gpu/src/imgproc_gpu.cpp | 27 +++++++++++++++ modules/gpu/test/test_blend.cpp | 18 ++++++---- modules/gpu/test/test_imgproc_gpu.cpp | 50 ++++++++++++++++++++++++++++ samples/gpu/cascadeclassifier_nvidia_api.cpp | 3 ++ 8 files changed, 127 insertions(+), 14 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index a19809a..874666c 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -786,11 +786,13 @@ namespace cv //! computes the proximity map for the raster template and the image where the template is searched for CV_EXPORTS void matchTemplate(const GpuMat& image, const GpuMat& templ, GpuMat& result, int method); + //! downsamples image + CV_EXPORTS void downsample(const GpuMat& src, GpuMat& dst, int k=2); + //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero - CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, - const GpuMat& weights1, const GpuMat& weights2, GpuMat& result); - + CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, + GpuMat& result); ////////////////////////////// Matrix reductions ////////////////////////////// diff --git a/modules/gpu/src/blend.cpp b/modules/gpu/src/blend.cpp index 7c782e6..c5dfd58 100644 --- a/modules/gpu/src/blend.cpp +++ b/modules/gpu/src/blend.cpp @@ -63,8 +63,8 @@ namespace cv { namespace gpu const PtrStepf weights1, const PtrStepf weights2, PtrStep result); }} -void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, - const GpuMat& weights1, const GpuMat& weights2, GpuMat& result) +void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2, + GpuMat& result) { CV_Assert(img1.size() == img2.size()); CV_Assert(img1.type() == img2.type()); @@ -94,7 +94,7 @@ void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, (const PtrStepf)weights1, (const PtrStepf)weights2, (PtrStepf)result); break; default: - CV_Error(CV_StsBadArg, "unsupported image depth in linear blending method"); + CV_Error(CV_StsUnsupportedFormat, "bad image depth in linear blending function"); } } diff --git a/modules/gpu/src/cuda/blend.cu b/modules/gpu/src/cuda/blend.cu index a9b85c1..42b5a63 100644 --- a/modules/gpu/src/cuda/blend.cu +++ b/modules/gpu/src/cuda/blend.cu @@ -73,7 +73,7 @@ namespace cv { namespace gpu dim3 threads(16, 16); dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y)); - blendLinearKernel<<>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result); + blendLinearKernel<<>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result); cudaSafeCall(cudaThreadSynchronize()); } diff --git a/modules/gpu/src/cuda/imgproc.cu b/modules/gpu/src/cuda/imgproc.cu index 2c94f83..dad5335 100644 --- a/modules/gpu/src/cuda/imgproc.cu +++ b/modules/gpu/src/cuda/imgproc.cu @@ -883,5 +883,32 @@ namespace cv { namespace gpu { namespace imgproc cudaSafeCall(cudaThreadSynchronize()); } + ///////////////////////////////////////////////////////////////////////// + // downsample + + template + __global__ void downsampleKernel(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst) + { + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < cols && y < rows) + dst.ptr(y)[x] = src.ptr(y * k)[x * k]; + } + + + template + void downsampleCaller(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst) + { + dim3 threads(16, 16); + dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); + + downsampleKernel<<>>(src, rows, cols, k, dst); + cudaSafeCall(cudaThreadSynchronize()); + } + + template void downsampleCaller(const PtrStep src, int rows, int cols, int k, PtrStep dst); + template void downsampleCaller(const PtrStepf src, int rows, int cols, int k, PtrStepf dst); + }}} diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp index 486ca45..0db8f5a 100644 --- a/modules/gpu/src/imgproc_gpu.cpp +++ b/modules/gpu/src/imgproc_gpu.cpp @@ -82,6 +82,7 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); } void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); } +void cv::gpu::downsample(const GpuMat&, GpuMat&, int) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -1355,7 +1356,33 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, cufftSafeCall(cufftDestroy(planC2R)); } +//////////////////////////////////////////////////////////////////// +// downsample +namespace cv { namespace gpu { namespace imgproc +{ + template + void downsampleCaller(const PtrStep_ src, int rows, int cols, int k, PtrStep_ dst); +}}} + +void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, int k) +{ + CV_Assert(src.channels() == 1); + + dst.create((src.rows + k - 1) / k, (src.cols + k - 1) / k, src.type()); + + switch (src.depth()) + { + case CV_8U: + imgproc::downsampleCaller((const PtrStep)src, dst.rows, dst.cols, k, (PtrStep)dst); + break; + case CV_32F: + imgproc::downsampleCaller((const PtrStepf)src, dst.rows, dst.cols, k, (PtrStepf)dst); + break; + default: + CV_Error(CV_StsUnsupportedFormat, "bad image depth in downsample function"); + } +} #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_blend.cpp b/modules/gpu/test/test_blend.cpp index 60955ad..9479695 100644 --- a/modules/gpu/test/test_blend.cpp +++ b/modules/gpu/test/test_blend.cpp @@ -47,8 +47,9 @@ using namespace cv::gpu; TEST(blendLinear, accuracy_on_8U) { - Size size(607, 1021); - RNG rng(0); + RNG& rng = cvtest::TS::ptr()->get_rng(); + Size size(200 + cvtest::randInt(rng) % 1000, + 200 + cvtest::randInt(rng) % 1000); for (int cn = 1; cn <= 4; ++cn) { Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_8U, cn), 0, 255, false); @@ -66,14 +67,16 @@ TEST(blendLinear, accuracy_on_8U) } GpuMat d_result; blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result); - ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1) << ", cn=" << cn; + ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1) + << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn; } } TEST(blendLinear, accuracy_on_32F) { - Size size(607, 1021); - RNG rng(0); + RNG& rng = cvtest::TS::ptr()->get_rng(); + Size size(200 + cvtest::randInt(rng) % 1000, + 200 + cvtest::randInt(rng) % 1000); for (int cn = 1; cn <= 4; ++cn) { Mat img1 = cvtest::randomMat(rng, size, CV_MAKE_TYPE(CV_32F, cn), 0, 1, false); @@ -91,6 +94,7 @@ TEST(blendLinear, accuracy_on_32F) } GpuMat d_result; blendLinear(GpuMat(img1), GpuMat(img2), GpuMat(weights1), GpuMat(weights2), d_result); - ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1e-3) << ", cn=" << cn; + ASSERT_LE(cvtest::norm(result_gold, Mat(d_result), NORM_INF), 1e-3) + << "rows=" << size.height << ", cols=" << size.width << ", cn=" << cn; } -} \ No newline at end of file +} diff --git a/modules/gpu/test/test_imgproc_gpu.cpp b/modules/gpu/test/test_imgproc_gpu.cpp index b8363a0..f61698e 100644 --- a/modules/gpu/test/test_imgproc_gpu.cpp +++ b/modules/gpu/test/test_imgproc_gpu.cpp @@ -914,3 +914,53 @@ TEST(minEigen, accuracy) { CV_GpuCornerMinEigenValTest test; test.safe_run(); } TEST(columnSum, accuracy) { CV_GpuColumnSumTest test; test.safe_run(); } TEST(norm, accuracy) { CV_GpuNormTest test; test.safe_run(); } TEST(reprojectImageTo3D, accuracy) { CV_GpuReprojectImageTo3DTest test; test.safe_run(); } + +TEST(downsample, accuracy_on_8U) +{ + RNG& rng = cvtest::TS::ptr()->get_rng(); + Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000); + Mat src = cvtest::randomMat(rng, size, CV_8U, 0, 255, false); + + for (int k = 2; k <= 5; ++k) + { + GpuMat d_dst; + downsample(GpuMat(src), d_dst, k); + + Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k); + ASSERT_EQ(dst_gold_size.width, d_dst.cols) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + ASSERT_EQ(dst_gold_size.height, d_dst.rows) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + + Mat dst = d_dst; + for (int y = 0; y < dst.rows; ++y) + for (int x = 0; x < dst.cols; ++x) + ASSERT_EQ(src.at(y * k, x * k), dst.at(y, x)) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + } +} + +TEST(downsample, accuracy_on_32F) +{ + RNG& rng = cvtest::TS::ptr()->get_rng(); + Size size(200 + cvtest::randInt(rng) % 1000, 200 + cvtest::randInt(rng) % 1000); + Mat src = cvtest::randomMat(rng, size, CV_32F, 0, 1, false); + + for (int k = 2; k <= 5; ++k) + { + GpuMat d_dst; + downsample(GpuMat(src), d_dst, k); + + Size dst_gold_size((src.cols + k - 1) / k, (src.rows + k - 1) / k); + ASSERT_EQ(dst_gold_size.width, d_dst.cols) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + ASSERT_EQ(dst_gold_size.height, d_dst.rows) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + + Mat dst = d_dst; + for (int y = 0; y < dst.rows; ++y) + for (int x = 0; x < dst.cols; ++x) + ASSERT_FLOAT_EQ(src.at(y * k, x * k), dst.at(y, x)) + << "rows=" << size.height << ", cols=" << size.width << ", k=" << k; + } +} diff --git a/samples/gpu/cascadeclassifier_nvidia_api.cpp b/samples/gpu/cascadeclassifier_nvidia_api.cpp index 2dd3945..2faabb3 100644 --- a/samples/gpu/cascadeclassifier_nvidia_api.cpp +++ b/samples/gpu/cascadeclassifier_nvidia_api.cpp @@ -5,7 +5,10 @@ #include #include #include + +#ifdef HAVE_CUDA #include "NCVHaarObjectDetection.hpp" +#endif using namespace std; using namespace cv; -- 2.7.4