1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other materials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
47 #include "precomp.hpp"
48 #include "opencl_kernels.hpp"
51 using namespace cv::ocl;
57 void matchTemplate_SQDIFF(
58 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
60 void matchTemplate_SQDIFF_NORMED(
61 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
64 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
66 void matchTemplate_CCORR(
67 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
69 void matchTemplate_CCORR_NORMED(
70 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
72 void matchTemplate_CCOFF(
73 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
75 void matchTemplate_CCOFF_NORMED(
76 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
79 void matchTemplateNaive_SQDIFF(
80 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
82 void matchTemplateNaive_CCORR(
83 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
85 void extractFirstChannel_32F(
86 const oclMat &image, oclMat &result);
88 // Evaluates optimal template's area threshold. If
89 // template's area is less than the threshold, we use naive match
90 // template version, otherwise FFT-based (if available)
91 static bool useNaive(int , int , Size )
94 // always use naive until convolve is imported
98 //////////////////////////////////////////////////////////////////////
100 void matchTemplate_SQDIFF(
101 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
103 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
104 if (useNaive(CV_TM_SQDIFF, image.depth(), templ.size()))
106 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
111 buf.image_sqsums.resize(1);
113 // TODO, add double support for ocl::integral
114 // use CPU integral temporarily
116 cv::integral(Mat(image.reshape(1)), sums, sqsums);
117 buf.image_sqsums[0] = sqsums;
119 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
120 matchTemplate_CCORR(image, templ, result, buf);
122 //port CUDA's matchTemplatePrepared_SQDIFF_8U
123 Context *clCxt = image.clCxt;
124 string kernelName = "matchTemplate_Prepared_SQDIFF";
125 vector< pair<size_t, const void *> > args;
127 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
128 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
129 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
130 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
131 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
132 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
133 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
134 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
135 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
136 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
137 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
139 size_t globalThreads[3] = {result.cols, result.rows, 1};
140 size_t localThreads[3] = {16, 16, 1};
142 const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
143 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
147 void matchTemplate_SQDIFF_NORMED(
148 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
150 matchTemplate_CCORR(image, templ, result, buf);
151 buf.image_sums.resize(1);
153 integral(image.reshape(1), buf.image_sums[0]);
155 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
157 Context *clCxt = image.clCxt;
158 string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
159 vector< pair<size_t, const void *> > args;
161 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
162 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
163 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
164 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
165 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
166 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
167 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
168 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
169 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
170 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
171 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
173 size_t globalThreads[3] = {result.cols, result.rows, 1};
174 size_t localThreads[3] = {16, 16, 1};
175 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
178 void matchTemplateNaive_SQDIFF(
179 const oclMat &image, const oclMat &templ, oclMat &result, int)
181 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
182 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
184 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
185 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
187 Context *clCxt = image.clCxt;
188 string kernelName = "matchTemplate_Naive_SQDIFF";
190 vector< pair<size_t, const void *> > args;
192 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
193 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
194 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
195 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
196 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
197 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
198 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
199 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
200 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
201 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
202 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
203 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
204 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
205 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
206 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
208 size_t globalThreads[3] = {result.cols, result.rows, 1};
209 size_t localThreads[3] = {16, 16, 1};
210 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
213 //////////////////////////////////////////////////////////////////////
216 const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &)
218 CV_Error(-1, "convolve is not fully implemented yet");
221 void matchTemplate_CCORR(
222 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
224 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
225 if (useNaive(CV_TM_CCORR, image.depth(), templ.size()))
227 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
232 if(image.depth() == CV_8U && templ.depth() == CV_8U)
234 image.convertTo(buf.imagef, CV_32F);
235 templ.convertTo(buf.templf, CV_32F);
236 convolve_32F(buf.imagef, buf.templf, result, buf);
240 convolve_32F(image, templ, result, buf);
245 void matchTemplate_CCORR_NORMED(
246 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
248 cv::ocl::oclMat temp;
249 matchTemplate_CCORR(image, templ, result, buf);
250 buf.image_sums.resize(1);
251 buf.image_sqsums.resize(1);
252 integral(image.reshape(1), buf.image_sums[0], temp);
253 if(temp.depth() == CV_64F)
254 temp.convertTo(buf.image_sqsums[0], CV_32FC1);
256 buf.image_sqsums[0] = temp;
257 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
259 Context *clCxt = image.clCxt;
260 string kernelName = "normalizeKernel";
261 vector< pair<size_t, const void *> > args;
263 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
264 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
265 args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
266 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
267 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
268 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
269 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
270 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
271 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
272 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
273 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
275 size_t globalThreads[3] = {result.cols, result.rows, 1};
276 size_t localThreads[3] = {16, 16, 1};
277 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
280 void matchTemplateNaive_CCORR(
281 const oclMat &image, const oclMat &templ, oclMat &result, int)
283 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
284 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
286 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
287 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
289 Context *clCxt = image.clCxt;
290 string kernelName = "matchTemplate_Naive_CCORR";
292 vector< pair<size_t, const void *> > args;
294 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
295 args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
296 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
297 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
298 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
299 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
300 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
301 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
302 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
303 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
304 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
305 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
306 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
307 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
308 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
310 size_t globalThreads[3] = {result.cols, result.rows, 1};
311 size_t localThreads[3] = {16, 16, 1};
312 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
314 //////////////////////////////////////////////////////////////////////
316 void matchTemplate_CCOFF(
317 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
319 CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
321 matchTemplate_CCORR(image, templ, result, buf);
323 Context *clCxt = image.clCxt;
326 kernelName = "matchTemplate_Prepared_CCOFF";
327 size_t globalThreads[3] = {result.cols, result.rows, 1};
328 size_t localThreads[3] = {16, 16, 1};
330 vector< pair<size_t, const void *> > args;
331 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
332 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
333 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
334 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
335 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
336 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
337 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
338 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
339 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
340 Vec4f templ_sum = Vec4f::all(0);
341 // to be continued in the following section
342 if(image.oclchannels() == 1)
344 buf.image_sums.resize(1);
345 integral(image, buf.image_sums[0]);
347 templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
348 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
349 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
350 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
351 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
356 split(image, buf.images);
357 templ_sum = sum(templ) / templ.size().area();
358 buf.image_sums.resize(buf.images.size());
361 for(int i = 0; i < image.oclchannels(); i ++)
363 integral(buf.images[i], buf.image_sums[i]);
365 switch(image.oclchannels())
368 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
369 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
370 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
371 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
372 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
373 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
374 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
375 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
376 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
377 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
380 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
384 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
387 void matchTemplate_CCOFF_NORMED(
388 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
390 image.convertTo(buf.imagef, CV_32F);
391 templ.convertTo(buf.templf, CV_32F);
393 matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
394 float scale = 1.f / templ.size().area();
396 Context *clCxt = image.clCxt;
399 kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
400 size_t globalThreads[3] = {result.cols, result.rows, 1};
401 size_t localThreads[3] = {16, 16, 1};
403 vector< pair<size_t, const void *> > args;
404 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
405 args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
406 args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
407 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
408 args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
409 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
410 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
411 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
412 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
413 args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
415 Vec4f templ_sum = Vec4f::all(0);
416 Vec4f templ_sqsum = Vec4f::all(0);
417 // to be continued in the following section
418 if(image.oclchannels() == 1)
420 buf.image_sums.resize(1);
421 buf.image_sqsums.resize(1);
422 cv::ocl::oclMat temp;
423 integral(image, buf.image_sums[0], temp);
424 if(temp.depth() == CV_64F)
425 temp.convertTo(buf.image_sqsums[0], CV_32FC1);
427 buf.image_sqsums[0] = temp;
429 templ_sum[0] = (float)sum(templ)[0];
431 templ_sqsum[0] = sqrSum(templ)[0];
433 templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
434 templ_sum[0] *= scale;
436 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
437 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
438 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
439 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
440 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
441 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
442 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
443 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
448 split(image, buf.images);
449 templ_sum = sum(templ);
451 templ_sqsum = sqrSum(templ);
453 templ_sqsum -= scale * templ_sum * templ_sum;
455 float templ_sqsum_sum = 0;
456 for(int i = 0; i < image.oclchannels(); i ++)
458 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
461 buf.image_sums.resize(buf.images.size());
462 buf.image_sqsums.resize(buf.images.size());
463 cv::ocl::oclMat temp;
464 for(int i = 0; i < image.oclchannels(); i ++)
466 integral(buf.images[i], buf.image_sums[i], temp);
467 if(temp.depth() == CV_64F)
468 temp.convertTo(buf.image_sqsums[i], CV_32FC1);
470 buf.image_sqsums[i] = temp;
473 switch(image.oclchannels())
476 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
477 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
478 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
479 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
480 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
481 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
482 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
483 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
484 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
485 args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
486 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
487 args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
488 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
489 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
490 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
491 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
492 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
495 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
499 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
501 void extractFirstChannel_32F(const oclMat &image, oclMat &result)
503 Context *clCxt = image.clCxt;
506 kernelName = "extractFirstChannel";
507 size_t globalThreads[3] = {result.cols, result.rows, 1};
508 size_t localThreads[3] = {16, 16, 1};
510 vector< pair<size_t, const void *> > args;
511 args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data) );
512 args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
513 args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
514 args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
515 args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
516 args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
517 args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
518 args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
520 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
525 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
527 MatchTemplateBuf buf;
528 matchTemplate(image, templ, result, method, buf);
530 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
532 CV_Assert(image.type() == templ.type());
533 CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
535 typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
537 const Caller callers[] =
539 ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
540 ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
541 ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
544 Caller caller = callers[method];
546 caller(image, templ, result, buf);