From 50b72197ab672d808e6883bd10294dc5a0edab69 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 29 Aug 2011 08:06:45 +0000 Subject: [PATCH] fixed bug in gpu::matchTemplate (added normalization routine to make the GPU version consistent with the CPU one), added test cases from the ticket #1341 --- modules/gpu/src/cuda/match_template.cu | 84 ++++++++++++++++++++++------------ modules/gpu/test/test_imgproc.cpp | 81 ++++++++++++++++++++++++-------- 2 files changed, 117 insertions(+), 48 deletions(-) diff --git a/modules/gpu/src/cuda/match_template.cu b/modules/gpu/src/cuda/match_template.cu index 301a0b6..3f8e92a 100644 --- a/modules/gpu/src/cuda/match_template.cu +++ b/modules/gpu/src/cuda/match_template.cu @@ -313,6 +313,29 @@ void matchTemplatePrepared_SQDIFF_8U( } +// normAcc* are accurate normalization routines which make GPU matchTemplate +// consistent with CPU one + +__device__ float normAcc(float num, float denum) +{ + if (fabs(num) < denum) + return num / denum; + if (fabs(num) < denum * 1.125f) + return num > 0 ? 1 : -1; + return 0; +} + + +__device__ float normAcc_SQDIFF(float num, float denum) +{ + if (fabs(num) < denum) + return num / denum; + if (fabs(num) < denum * 1.125f) + return num > 0 ? 1 : -1; + return 1; +} + + template __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( int w, int h, const PtrStep_ image_sqsum, @@ -327,8 +350,8 @@ __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U( (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = min(1.f, (image_sqsum_ - 2.f * ccorr + templ_sqsum) * - rsqrtf(image_sqsum_ * templ_sqsum)); + result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum, + sqrtf(image_sqsum_ * templ_sqsum)); } } @@ -440,7 +463,7 @@ void matchTemplatePrepared_CCOFF_8UC2( __global__ void matchTemplatePreparedKernel_CCOFF_8UC3( int w, int h, - float templ_sum_scale_r, + float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b, const PtrStep_ image_sum_r, @@ -463,7 +486,7 @@ __global__ void matchTemplatePreparedKernel_CCOFF_8UC3( (image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) - (image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x])); float ccorr = result.ptr(y)[x]; - result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r + result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g - image_sum_b_ * templ_sum_scale_b; } @@ -484,8 +507,8 @@ void matchTemplatePrepared_CCOFF_8UC3( dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y)); matchTemplatePreparedKernel_CCOFF_8UC3<<>>( w, h, - (float)templ_sum_r / (w * h), - (float)templ_sum_g / (w * h), + (float)templ_sum_r / (w * h), + (float)templ_sum_g / (w * h), (float)templ_sum_b / (w * h), image_sum_r, image_sum_g, image_sum_b, result); cudaSafeCall( cudaGetLastError() ); @@ -579,8 +602,8 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U( float image_sqsum_ = (float)( (image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) - (image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x])); - result.ptr(y)[x] = (ccorr - image_sum_ * templ_sum_scale) * - rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_ - weight * image_sum_ * image_sum_)); + result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale, + sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_))); } } @@ -631,11 +654,12 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2( float image_sqsum_g_ = (float)( (image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) - (image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_)); - result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g) * rdenom; + + float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r + - image_sum_g_ * templ_sum_scale_g; + float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ + + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_)); + result.ptr(y)[x] = normAcc(num, denum); } } @@ -701,13 +725,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3( float image_sqsum_b_ = (float)( (image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) - (image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_)); - result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b) * rdenom; + + float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r + - image_sum_g_ * templ_sum_scale_g + - image_sum_b_ * templ_sum_scale_b; + float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ + + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ + + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_)); + result.ptr(y)[x] = normAcc(num, denum); } } @@ -785,15 +810,14 @@ __global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4( float image_sqsum_a_ = (float)( (image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) - (image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x])); - float ccorr = result.ptr(y)[x]; - float rdenom = rsqrtf(templ_sqsum_scale * max(1e-3f, image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ - + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ - + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_ - + image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_)); - result.ptr(y)[x] = (ccorr - image_sum_r_ * templ_sum_scale_r - - image_sum_g_ * templ_sum_scale_g - - image_sum_b_ * templ_sum_scale_b - - image_sum_a_ * templ_sum_scale_a) * rdenom; + + float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g + - image_sum_b_ * templ_sum_scale_b - image_sum_a_ * templ_sum_scale_a; + float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_ + + image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_ + + image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_ + + image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_)); + result.ptr(y)[x] = normAcc(num, denum); } } @@ -850,7 +874,7 @@ __global__ void normalizeKernel_8U( float image_sqsum_ = (float)( (image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) - (image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn])); - result.ptr(y)[x] = result.ptr(y)[x] * rsqrtf(max(1.f, image_sqsum_) * templ_sqsum); + result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum)); } } diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index ab01142..169a10b 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -3594,13 +3594,8 @@ INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate32F, testing::Combine( testing::Range(1, 5), testing::Values((int)CV_TM_SQDIFF, (int)CV_TM_CCORR))); -struct MatchTemplate : testing::TestWithParam< std::tr1::tuple > +struct MatchTemplateBlackSource : testing::TestWithParam< std::tr1::tuple > { - cv::Mat image; - cv::Mat pattern; - - cv::Point maxLocGold; - cv::gpu::DeviceInfo devInfo; int method; @@ -3608,26 +3603,25 @@ struct MatchTemplate : testing::TestWithParam< std::tr1::tuple(GetParam()); method = std::tr1::get<1>(GetParam()); - cv::gpu::setDevice(devInfo.deviceID()); - - image = readImage("matchtemplate/black.png"); - ASSERT_FALSE(image.empty()); - - pattern = readImage("matchtemplate/cat.png"); - ASSERT_FALSE(pattern.empty()); - - maxLocGold = cv::Point(284, 12); } }; -TEST_P(MatchTemplate, FindPatternInBlack) +TEST_P(MatchTemplateBlackSource, Accuracy) { const char* matchTemplateMethodStr = matchTemplateMethods[method]; PRINT_PARAM(devInfo); PRINT_PARAM(matchTemplateMethodStr); + cv::Mat image = readImage("matchtemplate/black.png"); + ASSERT_FALSE(image.empty()); + + cv::Mat pattern = readImage("matchtemplate/cat.png"); + ASSERT_FALSE(pattern.empty()); + + cv::Point maxLocGold = cv::Point(284, 12); + cv::Mat dst; ASSERT_NO_THROW( @@ -3643,10 +3637,61 @@ TEST_P(MatchTemplate, FindPatternInBlack) ASSERT_EQ(maxLocGold, maxLoc); } -INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate, testing::Combine( - testing::ValuesIn(devices()), +INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplateBlackSource, testing::Combine( + testing::ValuesIn(devices()), testing::Values((int)CV_TM_CCOEFF_NORMED, (int)CV_TM_CCORR_NORMED))); + +struct MatchTemplate_CCOEF_NORMED : testing::TestWithParam< std::tr1::tuple > > +{ + cv::gpu::DeviceInfo devInfo; + cv::Mat image, pattern; + + virtual void SetUp() + { + devInfo = std::tr1::get<0>(GetParam()); + + image = readImage(std::tr1::get<0>(std::tr1::get<1>(GetParam()))); + ASSERT_FALSE(image.empty()); + + pattern = readImage(std::tr1::get<1>(std::tr1::get<1>(GetParam()))); + ASSERT_FALSE(pattern.empty()); + } +}; + +TEST_P(MatchTemplate_CCOEF_NORMED, Accuracy) +{ + PRINT_PARAM(devInfo); + + cv::Mat dstGold; + cv::matchTemplate(image, pattern, dstGold, CV_TM_CCOEFF_NORMED); + cv::Point minLocGold, maxLocGold; + cv::minMaxLoc(dstGold, NULL, NULL, &minLocGold, &maxLocGold); + + cv::Mat dst; + ASSERT_NO_THROW( + cv::gpu::GpuMat dev_dst; + cv::gpu::matchTemplate(cv::gpu::GpuMat(image), cv::gpu::GpuMat(pattern), dev_dst, CV_TM_CCOEFF_NORMED); + dev_dst.download(dst); + ); + + cv::Point minLoc, maxLoc; + double minVal, maxVal; + cv::minMaxLoc(dst, &minVal, &maxVal, &minLoc, &maxLoc); + + ASSERT_EQ(minLocGold, minLoc); + ASSERT_EQ(maxLocGold, maxLoc); + ASSERT_LE(maxVal, 1.); + ASSERT_GE(minVal, -1.); +} + + +INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CCOEF_NORMED, testing::Combine( + testing::ValuesIn(devices()), + testing::Values(std::tr1::make_tuple("matchtemplate/source-0.png", "matchtemplate/target-0.png"), + std::tr1::make_tuple("matchtemplate/source-1.png", "matchtemplate/target-1.png")))); + + //////////////////////////////////////////////////////////////////////////// // MulSpectrums -- 2.7.4