// Prepared_SQDIFF\r
\r
template <int cn>\r
- __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)\r
+ __global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result)\r
{\r
const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
}\r
\r
template <int cn>\r
- void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream)\r
+ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream)\r
{\r
const dim3 threads(32, 8);\r
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));\r
cudaSafeCall( cudaDeviceSynchronize() );\r
}\r
\r
- void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, int cn, \r
+ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, int cn,\r
cudaStream_t stream)\r
{\r
- typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
+ typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
\r
static const caller_t callers[] = \r
{\r
\r
\r
template <int cn>\r
- __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result)\r
+ __global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(\r
+ int w, int h, const PtrStep<unsigned long long> image_sqsum,\r
+ unsigned long long templ_sqsum, DevMem2Df result)\r
{\r
const int x = blockIdx.x * blockDim.x + threadIdx.x;\r
const int y = blockIdx.y * blockDim.y + threadIdx.y;\r
}\r
\r
template <int cn>\r
- void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, \r
+ void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,\r
DevMem2Df result, cudaStream_t stream)\r
{\r
const dim3 threads(32, 8);\r
}\r
\r
\r
- void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, \r
+ void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum,\r
DevMem2Df result, int cn, cudaStream_t stream)\r
{\r
- typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
+ typedef void (*caller_t)(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result, cudaStream_t stream);\r
static const caller_t callers[] = \r
{\r
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>\r
template <typename T>\r
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)\r
{\r
- typedef typename SumType<T>::R R;\r
+ typedef double R;\r
\r
dim3 threads, grid;\r
estimateThreadCfg(src.cols, src.rows, threads, grid);\r
void matchTemplateNaive_SQDIFF_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);\r
void matchTemplateNaive_SQDIFF_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);\r
\r
- void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, \r
+ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,\r
int cn, cudaStream_t stream);\r
\r
- void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned int templ_sqsum, DevMem2Df result, \r
+ void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum, unsigned long long templ_sqsum, DevMem2Df result,\r
int cn, cudaStream_t stream);\r
\r
void matchTemplatePrepared_CCOFF_8U(int w, int h, const DevMem2D_<unsigned int> image_sum, unsigned int templ_sum, DevMem2Df result, cudaStream_t stream);\r
GpuMat img_sqsum;\r
sqrIntegral(image.reshape(1), img_sqsum, stream);\r
\r
- unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];\r
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];\r
\r
matchTemplate_CCORR_8U(image, templ, result, stream);\r
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));\r
GpuMat img_sqsum;\r
sqrIntegral(image.reshape(1), img_sqsum, stream);\r
\r
- unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];\r
+ unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];\r
\r
matchTemplate_CCORR_8U(image, templ, result, stream);\r
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));\r
ALL_DEVICES,
Values(std::make_pair(std::string("matchtemplate/source-0.png"), std::string("matchtemplate/target-0.png")))));
+
+class MatchTemplate_CanFindBigTemplate : public TestWithParam<cv::gpu::DeviceInfo>
+{
+ virtual void SetUp()
+ {
+ cv::gpu::setDevice(GetParam().deviceID());
+ }
+};
+
+TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF_NORMED)
+{
+ cv::Mat scene = readImage("matchtemplate/scene.jpg");
+ cv::Mat templ = readImage("matchtemplate/template.jpg");
+
+ cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
+ cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF_NORMED);
+
+ double minVal;
+ cv::Point minLoc;
+ cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
+
+ ASSERT_GE(minVal, 0);
+ ASSERT_LT(minVal, 1e-3);
+ ASSERT_EQ(344, minLoc.x);
+ ASSERT_EQ(0, minLoc.y);
+}
+
+TEST_P(MatchTemplate_CanFindBigTemplate, SQDIFF)
+{
+ cv::Mat scene = readImage("matchtemplate/scene.jpg");
+ cv::Mat templ = readImage("matchtemplate/template.jpg");
+
+ cv::gpu::GpuMat d_scene(scene), d_templ(templ), d_result;
+ cv::gpu::matchTemplate(d_scene, d_templ, d_result, CV_TM_SQDIFF);
+
+ double minVal;
+ cv::Point minLoc;
+ cv::gpu::minMaxLoc(d_result, &minVal, 0, &minLoc, 0);
+
+ ASSERT_GE(minVal, 0);
+ ASSERT_EQ(344, minLoc.x);
+ ASSERT_EQ(0, minLoc.y);
+}
+
+INSTANTIATE_TEST_CASE_P(ImgProc, MatchTemplate_CanFindBigTemplate, ALL_DEVICES);
+
////////////////////////////////////////////////////////////////////////////
// MulSpectrums