1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
48 #include "precomp.hpp"
51 using namespace cv::ocl;
59 ///////////////////////////OpenCL kernel strings///////////////////////////
60 extern const char *match_template;
68 void matchTemplate_SQDIFF(
69 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
71 void matchTemplate_SQDIFF_NORMED(
72 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
75 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
77 void matchTemplate_CCORR(
78 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
80 void matchTemplate_CCORR_NORMED(
81 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
83 void matchTemplate_CCOFF(
84 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
86 void matchTemplate_CCOFF_NORMED(
87 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
90 void matchTemplateNaive_SQDIFF(
91 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
93 void matchTemplateNaive_CCORR(
94 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
96 void extractFirstChannel_32F(
97 const oclMat &image, oclMat &result);
99 // Evaluates optimal template's area threshold. If
100 // template's area is less than the threshold, we use naive match
101 // template version, otherwise FFT-based (if available)
102 static bool useNaive(int , int , Size )
105 // always use naive until convolve is imported
109 //////////////////////////////////////////////////////////////////////
111 void matchTemplate_SQDIFF(
112 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
114 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
115 if (useNaive(CV_TM_SQDIFF, image.depth(), templ.size()))
117 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
122 buf.image_sqsums.resize(1);
124 // TODO, add double support for ocl::integral
125 // use CPU integral temporarily
127 cv::integral(Mat(image.reshape(1)), sums, sqsums);
128 buf.image_sqsums[0] = sqsums;
130 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
131 matchTemplate_CCORR(image, templ, result, buf);
133 //port CUDA's matchTemplatePrepared_SQDIFF_8U
134 Context *clCxt = image.clCxt;
135 string kernelName = "matchTemplate_Prepared_SQDIFF";
136 vector< pair<size_t, const void *> > args;
138 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
139 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
140 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
141 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
142 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
143 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
144 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
145 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
146 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
147 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
148 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
150 size_t globalThreads[3] = {result.cols, result.rows, 1};
151 size_t localThreads[3] = {16, 16, 1};
153 const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
154 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
158 void matchTemplate_SQDIFF_NORMED(
159 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
161 matchTemplate_CCORR(image, templ, result, buf);
162 buf.image_sums.resize(1);
164 integral(image.reshape(1), buf.image_sums[0]);
166 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
168 Context *clCxt = image.clCxt;
169 string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
170 vector< pair<size_t, const void *> > args;
172 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
173 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
174 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
175 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
176 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
177 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
178 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
179 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
180 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
181 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
182 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
184 size_t globalThreads[3] = {result.cols, result.rows, 1};
185 size_t localThreads[3] = {16, 16, 1};
186 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
189 void matchTemplateNaive_SQDIFF(
190 const oclMat &image, const oclMat &templ, oclMat &result, int)
192 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
193 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
195 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
196 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
198 Context *clCxt = image.clCxt;
199 string kernelName = "matchTemplate_Naive_SQDIFF";
201 vector< pair<size_t, const void *> > args;
203 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
204 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
205 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
206 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
207 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
208 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
209 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
210 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
211 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
212 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
213 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
214 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
215 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
216 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
217 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
219 size_t globalThreads[3] = {result.cols, result.rows, 1};
220 size_t localThreads[3] = {16, 16, 1};
221 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
224 //////////////////////////////////////////////////////////////////////
227 const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &)
229 CV_Error(-1, "convolve is not fully implemented yet");
232 void matchTemplate_CCORR(
233 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
235 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
236 if (useNaive(CV_TM_CCORR, image.depth(), templ.size()))
238 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
243 if(image.depth() == CV_8U && templ.depth() == CV_8U)
245 image.convertTo(buf.imagef, CV_32F);
246 templ.convertTo(buf.templf, CV_32F);
247 convolve_32F(buf.imagef, buf.templf, result, buf);
251 convolve_32F(image, templ, result, buf);
256 void matchTemplate_CCORR_NORMED(
257 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
259 matchTemplate_CCORR(image, templ, result, buf);
260 buf.image_sums.resize(1);
261 buf.image_sqsums.resize(1);
263 integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
265 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
267 Context *clCxt = image.clCxt;
268 string kernelName = "normalizeKernel";
269 vector< pair<size_t, const void *> > args;
271 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
272 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
273 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
274 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
275 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
276 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
277 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
278 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
279 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
280 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
281 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
283 size_t globalThreads[3] = {result.cols, result.rows, 1};
284 size_t localThreads[3] = {16, 16, 1};
285 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
288 void matchTemplateNaive_CCORR(
289 const oclMat &image, const oclMat &templ, oclMat &result, int)
291 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
292 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
294 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
295 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
297 Context *clCxt = image.clCxt;
298 string kernelName = "matchTemplate_Naive_CCORR";
300 vector< pair<size_t, const void *> > args;
302 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
303 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
304 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
305 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
306 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
307 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
308 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
309 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
310 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
311 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
312 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
313 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
314 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
315 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
316 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
318 size_t globalThreads[3] = {result.cols, result.rows, 1};
319 size_t localThreads[3] = {16, 16, 1};
320 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
322 //////////////////////////////////////////////////////////////////////
324 void matchTemplate_CCOFF(
325 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
327 CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
329 matchTemplate_CCORR(image, templ, result, buf);
331 Context *clCxt = image.clCxt;
334 kernelName = "matchTemplate_Prepared_CCOFF";
335 size_t globalThreads[3] = {result.cols, result.rows, 1};
336 size_t localThreads[3] = {16, 16, 1};
338 vector< pair<size_t, const void *> > args;
339 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
340 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
341 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
342 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
343 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
344 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
345 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
346 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
347 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
348 Vec4f templ_sum = Vec4f::all(0);
349 // to be continued in the following section
350 if(image.oclchannels() == 1)
352 buf.image_sums.resize(1);
353 integral(image, buf.image_sums[0]);
355 templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
356 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
357 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
358 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
359 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
364 split(image, buf.images);
365 templ_sum = sum(templ) / templ.size().area();
366 buf.image_sums.resize(buf.images.size());
369 for(int i = 0; i < image.oclchannels(); i ++)
371 integral(buf.images[i], buf.image_sums[i]);
373 switch(image.oclchannels())
376 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
377 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
378 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
379 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
380 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
381 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
382 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
383 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
384 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
385 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
388 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
392 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
395 void matchTemplate_CCOFF_NORMED(
396 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
398 image.convertTo(buf.imagef, CV_32F);
399 templ.convertTo(buf.templf, CV_32F);
401 matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
402 float scale = 1.f / templ.size().area();
404 Context *clCxt = image.clCxt;
407 kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
408 size_t globalThreads[3] = {result.cols, result.rows, 1};
409 size_t localThreads[3] = {16, 16, 1};
411 vector< pair<size_t, const void *> > args;
412 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
413 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
414 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
415 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
416 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
417 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
418 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
419 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
420 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
421 args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
423 Vec4f templ_sum = Vec4f::all(0);
424 Vec4f templ_sqsum = Vec4f::all(0);
425 // to be continued in the following section
426 if(image.oclchannels() == 1)
428 buf.image_sums.resize(1);
429 buf.image_sqsums.resize(1);
430 integral(image, buf.image_sums[0], buf.image_sqsums[0]);
432 templ_sum[0] = (float)sum(templ)[0];
434 templ_sqsum[0] = sqrSum(templ)[0];
436 templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
437 templ_sum[0] *= scale;
439 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
440 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
441 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
442 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
443 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
444 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
445 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
446 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
451 split(image, buf.images);
452 templ_sum = sum(templ);
454 templ_sqsum = sqrSum(templ);
456 templ_sqsum -= scale * templ_sum * templ_sum;
458 float templ_sqsum_sum = 0;
459 for(int i = 0; i < image.oclchannels(); i ++)
461 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
464 buf.image_sums.resize(buf.images.size());
465 buf.image_sqsums.resize(buf.images.size());
467 for(int i = 0; i < image.oclchannels(); i ++)
469 integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
472 switch(image.oclchannels())
475 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
476 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
477 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
478 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
479 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
480 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
481 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
482 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
483 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
484 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
485 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
486 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
487 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
488 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
489 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
490 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
491 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
494 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
498 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
500 void extractFirstChannel_32F(const oclMat &image, oclMat &result)
502 Context *clCxt = image.clCxt;
505 kernelName = "extractFirstChannel";
506 size_t globalThreads[3] = {result.cols, result.rows, 1};
507 size_t localThreads[3] = {16, 16, 1};
509 vector< pair<size_t, const void *> > args;
510 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data) );
511 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
512 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
513 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
514 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
515 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
516 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
517 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
519 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
524 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
526 MatchTemplateBuf buf;
527 matchTemplate(image, templ, result, method, buf);
529 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
531 CV_Assert(image.type() == templ.type());
532 CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
534 typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
536 const Caller callers[] =
538 ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
539 ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
540 ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
543 Caller caller = callers[method];
545 caller(image, templ, result, buf);