1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other oclMaterials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
48 #include "precomp.hpp"
51 using namespace cv::ocl;
58 ///////////////////////////OpenCL kernel strings///////////////////////////
59 extern const char *match_template;
67 void matchTemplate_SQDIFF(
68 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
70 void matchTemplate_SQDIFF_NORMED(
71 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
74 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
76 void matchTemplate_CCORR(
77 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
79 void matchTemplate_CCORR_NORMED(
80 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
82 void matchTemplate_CCOFF(
83 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
85 void matchTemplate_CCOFF_NORMED(
86 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
89 void matchTemplateNaive_SQDIFF(
90 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
92 void matchTemplateNaive_CCORR(
93 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
95 void extractFirstChannel_32F(
96 const oclMat &image, oclMat &result);
98 // Evaluates optimal template's area threshold. If
99 // template's area is less than the threshold, we use naive match
100 // template version, otherwise FFT-based (if available)
101 static bool useNaive(int method, int depth, Size size)
104 if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(Context::CL_DOUBLE)))
108 else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
110 return size.height < 18 && size.width < 18;
115 #define UNUSED(x) (void)(x);
116 UNUSED(method) UNUSED(depth) UNUSED(size)
122 //////////////////////////////////////////////////////////////////////
124 void matchTemplate_SQDIFF(
125 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
127 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
128 if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
130 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
135 buf.image_sqsums.resize(1);
137 // TODO, add double support for ocl::integral
138 // use CPU integral temporarily
140 cv::integral(Mat(image.reshape(1)), sums, sqsums);
141 buf.image_sqsums[0] = sqsums;
143 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
144 matchTemplate_CCORR(image, templ, result, buf);
146 //port CUDA's matchTemplatePrepared_SQDIFF_8U
147 Context *clCxt = image.clCxt;
148 String kernelName = "matchTemplate_Prepared_SQDIFF";
149 std::vector< std::pair<size_t, const void *> > args;
151 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
152 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
153 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
154 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
155 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
156 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
157 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
158 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
159 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
160 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
161 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
163 size_t globalThreads[3] = {result.cols, result.rows, 1};
164 size_t localThreads[3] = {16, 16, 1};
166 const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
167 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
171 void matchTemplate_SQDIFF_NORMED(
172 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
174 matchTemplate_CCORR(image, templ, result, buf);
175 buf.image_sums.resize(1);
177 integral(image.reshape(1), buf.image_sums[0]);
179 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
181 Context *clCxt = image.clCxt;
182 String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
183 std::vector< std::pair<size_t, const void *> > args;
185 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
186 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
187 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
188 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
189 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
190 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
191 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
192 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
193 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
194 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
195 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
197 size_t globalThreads[3] = {result.cols, result.rows, 1};
198 size_t localThreads[3] = {16, 16, 1};
199 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
202 void matchTemplateNaive_SQDIFF(
203 const oclMat &image, const oclMat &templ, oclMat &result, int)
205 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
206 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
208 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
209 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
211 Context *clCxt = image.clCxt;
212 String kernelName = "matchTemplate_Naive_SQDIFF";
214 std::vector< std::pair<size_t, const void *> > args;
216 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
217 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
218 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
219 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
220 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
221 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
222 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
223 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
224 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
225 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
226 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
227 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
228 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
229 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
230 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
232 size_t globalThreads[3] = {result.cols, result.rows, 1};
233 size_t localThreads[3] = {16, 16, 1};
234 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
237 //////////////////////////////////////////////////////////////////////
240 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
242 ConvolveBuf convolve_buf;
243 convolve_buf.user_block_size = buf.user_block_size;
244 if (image.oclchannels() == 1)
245 convolve(image, templ, result, true, convolve_buf);
249 convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
250 extractFirstChannel_32F(result_, result);
254 void matchTemplate_CCORR(
255 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
257 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
258 if (useNaive(TM_CCORR, image.depth(), templ.size()))
260 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
265 if(image.depth() == CV_8U && templ.depth() == CV_8U)
267 image.convertTo(buf.imagef, CV_32F);
268 templ.convertTo(buf.templf, CV_32F);
269 convolve_32F(buf.imagef, buf.templf, result, buf);
273 convolve_32F(image, templ, result, buf);
278 void matchTemplate_CCORR_NORMED(
279 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
281 matchTemplate_CCORR(image, templ, result, buf);
282 buf.image_sums.resize(1);
283 buf.image_sqsums.resize(1);
285 integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
287 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
289 Context *clCxt = image.clCxt;
290 String kernelName = "normalizeKernel";
291 std::vector< std::pair<size_t, const void *> > args;
293 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
294 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
295 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
296 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
297 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
298 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
299 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
300 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
301 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
302 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
303 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
305 size_t globalThreads[3] = {result.cols, result.rows, 1};
306 size_t localThreads[3] = {16, 16, 1};
307 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
310 void matchTemplateNaive_CCORR(
311 const oclMat &image, const oclMat &templ, oclMat &result, int)
313 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
314 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
316 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
317 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
319 Context *clCxt = image.clCxt;
320 String kernelName = "matchTemplate_Naive_CCORR";
322 std::vector< std::pair<size_t, const void *> > args;
324 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
325 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
326 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
327 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
328 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
329 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
330 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
331 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
332 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
333 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
334 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
335 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
336 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
337 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
338 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
340 size_t globalThreads[3] = {result.cols, result.rows, 1};
341 size_t localThreads[3] = {16, 16, 1};
342 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
344 //////////////////////////////////////////////////////////////////////
346 void matchTemplate_CCOFF(
347 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
349 CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
351 matchTemplate_CCORR(image, templ, result, buf);
353 Context *clCxt = image.clCxt;
356 kernelName = "matchTemplate_Prepared_CCOFF";
357 size_t globalThreads[3] = {result.cols, result.rows, 1};
358 size_t localThreads[3] = {16, 16, 1};
360 std::vector< std::pair<size_t, const void *> > args;
361 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
362 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
363 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
364 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
365 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
366 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
367 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
368 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
369 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
370 Vec4f templ_sum = Vec4f::all(0);
371 // to be continued in the following section
372 if(image.oclchannels() == 1)
374 buf.image_sums.resize(1);
375 integral(image, buf.image_sums[0]);
377 templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
378 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
379 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
380 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
381 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
386 split(image, buf.images);
387 templ_sum = sum(templ) / templ.size().area();
388 buf.image_sums.resize(buf.images.size());
391 for(int i = 0; i < image.oclchannels(); i ++)
393 integral(buf.images[i], buf.image_sums[i]);
395 switch(image.oclchannels())
398 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
399 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
400 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
401 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
402 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
403 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
404 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
405 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
406 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
407 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
410 CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
414 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
417 void matchTemplate_CCOFF_NORMED(
418 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
420 image.convertTo(buf.imagef, CV_32F);
421 templ.convertTo(buf.templf, CV_32F);
423 matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
424 float scale = 1.f / templ.size().area();
426 Context *clCxt = image.clCxt;
429 kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
430 size_t globalThreads[3] = {result.cols, result.rows, 1};
431 size_t localThreads[3] = {16, 16, 1};
433 std::vector< std::pair<size_t, const void *> > args;
434 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
435 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
436 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
437 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
438 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
439 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
440 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
441 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
442 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
443 args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
445 Vec4f templ_sum = Vec4f::all(0);
446 Vec4f templ_sqsum = Vec4f::all(0);
447 // to be continued in the following section
448 if(image.oclchannels() == 1)
450 buf.image_sums.resize(1);
451 buf.image_sqsums.resize(1);
452 integral(image, buf.image_sums[0], buf.image_sqsums[0]);
454 templ_sum[0] = (float)sum(templ)[0];
456 templ_sqsum[0] = sqrSum(templ)[0];
458 templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
459 templ_sum[0] *= scale;
461 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
462 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
463 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
464 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
465 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
466 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
467 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
468 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
473 split(image, buf.images);
474 templ_sum = sum(templ);
476 templ_sqsum = sqrSum(templ);
478 templ_sqsum -= scale * templ_sum * templ_sum;
480 float templ_sqsum_sum = 0;
481 for(int i = 0; i < image.oclchannels(); i ++)
483 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
486 buf.image_sums.resize(buf.images.size());
487 buf.image_sqsums.resize(buf.images.size());
489 for(int i = 0; i < image.oclchannels(); i ++)
491 integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
494 switch(image.oclchannels())
497 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
498 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
499 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
500 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
501 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
502 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
503 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
504 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
505 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
506 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
507 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
508 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
509 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
510 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
511 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
512 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
513 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
516 CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
520 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
522 void extractFirstChannel_32F(const oclMat &image, oclMat &result)
524 Context *clCxt = image.clCxt;
527 kernelName = "extractFirstChannel";
528 size_t globalThreads[3] = {result.cols, result.rows, 1};
529 size_t localThreads[3] = {16, 16, 1};
531 std::vector< std::pair<size_t, const void *> > args;
532 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) );
533 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
534 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
535 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
536 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
537 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
538 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
539 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
541 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
546 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
548 MatchTemplateBuf buf;
549 matchTemplate(image, templ, result, method, buf);
551 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
553 CV_Assert(image.type() == templ.type());
554 CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
556 typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
558 const Caller callers[] =
560 ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
561 ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
562 ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
565 Caller caller = callers[method];
567 caller(image, templ, result, buf);