1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
48 #include "precomp.hpp"
51 using namespace cv::ocl;
56 #if !defined (HAVE_OPENCL)
57 void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
64 ///////////////////////////OpenCL kernel strings///////////////////////////
65 extern const char *match_template;
69 namespace cv { namespace ocl
71 void matchTemplate_SQDIFF(
72 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
74 void matchTemplate_SQDIFF_NORMED(
75 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
77 void matchTemplate_CCORR(
78 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
80 void matchTemplate_CCORR_NORMED(
81 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
83 void matchTemplate_CCOFF(
84 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
86 void matchTemplate_CCOFF_NORMED(
87 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
90 void matchTemplateNaive_SQDIFF(
91 const oclMat& image, const oclMat& templ, oclMat& result, int cn);
93 void matchTemplateNaive_CCORR(
94 const oclMat& image, const oclMat& templ, oclMat& result, int cn);
96 // Evaluates optimal template's area threshold. If
97 // template's area is less than the threshold, we use naive match
98 // template version, otherwise FFT-based (if available)
99 int getTemplateThreshold(int method, int depth)
104 if (depth == CV_32F) return 250;
105 if (depth == CV_8U) return 300;
108 if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
109 if (depth == CV_8U) return 300;
112 CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
117 //////////////////////////////////////////////////////////////////////
119 void matchTemplate_SQDIFF(
120 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
122 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
123 if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
125 matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
131 CV_Error(CV_StsBadArg, "Not supported yet for this size template");
135 void matchTemplate_SQDIFF_NORMED(
136 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
138 matchTemplate_CCORR(image,templ,result,buf);
139 buf.image_sums.resize(1);
140 buf.image_sqsums.resize(1);
142 integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
144 #if EXT_FP64 && SQRSUM_FIXED
145 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
147 Mat sqr_mat = templ.reshape(1);
148 unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
151 Context *clCxt = image.clCxt;
152 string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
153 vector< pair<size_t, const void *> > args;
155 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
156 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
157 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
158 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
159 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
160 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
161 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
162 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
163 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
164 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
165 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
167 size_t globalThreads[3] = {result.cols, result.rows, 1};
168 size_t localThreads[3] = {32, 8, 1};
169 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
172 void matchTemplateNaive_SQDIFF(
173 const oclMat& image, const oclMat& templ, oclMat& result, int cn)
175 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
176 || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
177 CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
178 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
180 Context *clCxt = image.clCxt;
181 string kernelName = "matchTemplate_Naive_SQDIFF";
183 vector< pair<size_t, const void *> > args;
185 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
186 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
187 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
188 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
189 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
190 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
191 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
192 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
193 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
194 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
195 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
196 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
197 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
198 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
199 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
201 size_t globalThreads[3] = {result.cols, result.rows, 1};
202 size_t localThreads[3] = {32, 8, 1};
203 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
206 //////////////////////////////////////////////////////////////////////
208 void matchTemplate_CCORR(
209 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
211 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
212 if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
214 matchTemplateNaive_CCORR(image, templ, result, image.channels());
219 CV_Error(CV_StsBadArg, "Not supported yet for this size template");
220 if(image.depth() == CV_8U && templ.depth() == CV_8U)
222 image.convertTo(buf.imagef, CV_32F);
223 templ.convertTo(buf.templf, CV_32F);
225 CV_Assert(image.channels() == 1);
226 oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
227 filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
228 result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
232 void matchTemplate_CCORR_NORMED(
233 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
235 matchTemplate_CCORR(image,templ,result,buf);
236 buf.image_sums.resize(1);
237 buf.image_sqsums.resize(1);
239 integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
240 #if EXT_FP64 && SQRSUM_FIXED
241 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
243 oclMat templ_c1 = templ.reshape(1);
244 multiply(templ_c1, templ_c1, templ_c1);
245 unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
247 Mat m_templ_c1 = templ.reshape(1);
248 multiply(m_templ_c1, m_templ_c1, m_templ_c1);
249 unsigned long long templ_sqsum = (unsigned long long)sum(m_templ_c1)[0];
251 Context *clCxt = image.clCxt;
252 string kernelName = "normalizeKernel";
253 vector< pair<size_t, const void *> > args;
255 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
256 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
257 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
258 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
259 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
260 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
261 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
262 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
263 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
264 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
265 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
267 size_t globalThreads[3] = {result.cols, result.rows, 1};
268 size_t localThreads[3] = {32, 8, 1};
269 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
272 void matchTemplateNaive_CCORR(
273 const oclMat& image, const oclMat& templ, oclMat& result, int cn)
275 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
276 || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
277 CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
278 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
280 Context *clCxt = image.clCxt;
281 string kernelName = "matchTemplate_Naive_CCORR";
283 vector< pair<size_t, const void *> > args;
285 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
286 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
287 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
288 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
289 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
290 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
291 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
292 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
293 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
294 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
295 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
296 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
297 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
298 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
299 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
301 size_t globalThreads[3] = {result.cols, result.rows, 1};
302 size_t localThreads[3] = {32, 8, 1};
303 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
305 //////////////////////////////////////////////////////////////////////
307 void matchTemplate_CCOFF(
308 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
310 CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
312 matchTemplate_CCORR(image,templ,result,buf);
314 Context *clCxt = image.clCxt;
317 kernelName = "matchTemplate_Prepared_CCOFF";
318 size_t globalThreads[3] = {result.cols, result.rows, 1};
319 size_t localThreads[3] = {32, 8, 1};
321 vector< pair<size_t, const void *> > args;
322 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
323 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
324 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
325 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
326 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
327 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
328 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
329 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
330 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
331 // to be continued in the following section
332 if(image.channels() == 1)
334 buf.image_sums.resize(1);
335 // FIXME: temp fix for incorrect integral kernel
337 integral(image, buf.image_sums[0], tmp_oclmat);
341 templ_sum = (float)sum(templ)[0] / templ.size().area();
344 templ_sum = (float)sum(o_templ)[0] / o_templ.size().area(); // temp fix for non-double supported machine
346 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
347 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
348 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
349 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
353 Vec4f templ_sum = Vec4f::all(0);
355 split(image,buf.images);
356 templ_sum = sum(templ) / templ.size().area();
358 // temp fix for non-double supported machine
359 Mat o_templ = templ, o_image = image;
360 vector<Mat> o_mat_vector;
361 o_mat_vector.resize(image.channels());
362 buf.images.resize(image.channels());
363 split(o_image, o_mat_vector);
364 for(int i = 0; i < o_mat_vector.size(); i ++)
366 buf.images[i] = oclMat(o_mat_vector[i]);
368 templ_sum = sum(o_templ) / templ.size().area();
370 buf.image_sums.resize(buf.images.size());
372 for(int i = 0; i < image.channels(); i ++)
374 // FIXME: temp fix for incorrect integral kernel
376 integral(buf.images[i], buf.image_sums[i], omat_temp);
378 switch(image.channels())
381 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
382 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
383 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
384 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
385 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
386 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
387 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
388 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
389 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
390 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
393 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
397 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
400 void matchTemplate_CCOFF_NORMED(
401 const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
403 image.convertTo(buf.imagef, CV_32F);
404 templ.convertTo(buf.templf, CV_32F);
406 matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
407 float scale = 1.f/templ.size().area();
409 Context *clCxt = image.clCxt;
412 kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
413 size_t globalThreads[3] = {result.cols, result.rows, 1};
414 size_t localThreads[3] = {32, 8, 1};
416 vector< pair<size_t, const void *> > args;
417 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
418 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
419 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
420 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
421 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
422 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
423 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
424 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
425 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
426 args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
427 // to be continued in the following section
428 if(image.channels() == 1)
430 buf.image_sums.resize(1);
431 buf.image_sqsums.resize(1);
432 integral(image, buf.image_sums[0], buf.image_sqsums[0]);
434 float templ_sqsum = 0;
436 templ_sum = (float)sum(templ)[0];
438 templ_sqsum = sqrSum(templ);
440 oclMat templ_sqr = templ;
441 multiply(templ,templ, templ_sqr);
442 templ_sqsum = sum(templ_sqr)[0];
443 #endif //SQRSUM_FIXED
444 templ_sqsum -= scale * templ_sum * templ_sum;
447 // temp fix for non-double supported machine
449 templ_sum = (float)sum(o_templ)[0];
450 templ_sqsum = sum(o_templ.mul(o_templ))[0] - scale * templ_sum * templ_sum;
453 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
454 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
455 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
456 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
457 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
458 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
459 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
460 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
464 Vec4f templ_sum = Vec4f::all(0);
465 Vec4f templ_sqsum = Vec4f::all(0);
467 split(image,buf.images);
468 templ_sum = sum(templ);
470 templ_sqsum = sqrSum(templ);
472 oclMat templ_sqr = templ;
473 multiply(templ,templ, templ_sqr);
474 templ_sqsum = sum(templ_sqr);
475 #endif //SQRSUM_FIXED
476 templ_sqsum -= scale * templ_sum * templ_sum;
479 // temp fix for non-double supported machine
480 Mat o_templ = templ, o_image = image;
482 vector<Mat> o_mat_vector;
483 o_mat_vector.resize(image.channels());
484 buf.images.resize(image.channels());
485 split(o_image, o_mat_vector);
486 for(int i = 0; i < o_mat_vector.size(); i ++)
488 buf.images[i] = oclMat(o_mat_vector[i]);
490 templ_sum = sum(o_templ);
491 templ_sqsum = sum(o_templ.mul(o_templ));
493 float templ_sqsum_sum = 0;
494 for(int i = 0; i < image.channels(); i ++)
496 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
499 buf.image_sums.resize(buf.images.size());
500 buf.image_sqsums.resize(buf.images.size());
502 for(int i = 0; i < image.channels(); i ++)
504 integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
507 switch(image.channels())
510 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
511 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
512 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
513 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
514 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
515 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
516 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
517 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
518 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
519 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
520 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
521 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
522 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
523 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
524 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
525 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
526 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
529 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
533 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
538 void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
540 MatchTemplateBuf buf;
541 matchTemplate(image,templ, result, method,buf);
543 void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
545 CV_Assert(image.type() == templ.type());
546 CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
548 typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
550 const Caller callers[] = {
551 ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
552 ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
553 ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
556 Caller caller = callers[method];
558 caller(image, templ, result, buf);