1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Peng Xiao, pengxiao@multicorewareinc.com
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
23 // * Redistribution's of source code must retain the above copyright notice,
24 // this list of conditions and the following disclaimer.
26 // * Redistribution's in binary form must reproduce the above copyright notice,
27 // this list of conditions and the following disclaimer in the documentation
28 // and/or other materials provided with the distribution.
30 // * The name of the copyright holders may not be used to endorse or promote products
31 // derived from this software without specific prior written permission.
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
47 #include "precomp.hpp"
48 #include "opencl_kernels.hpp"
51 using namespace cv::ocl;
57 void matchTemplate_SQDIFF(
58 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
60 void matchTemplate_SQDIFF_NORMED(
61 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
64 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
66 void matchTemplate_CCORR(
67 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
69 void matchTemplate_CCORR_NORMED(
70 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
72 void matchTemplate_CCOFF(
73 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
75 void matchTemplate_CCOFF_NORMED(
76 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
79 void matchTemplateNaive_SQDIFF(
80 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
82 void matchTemplateNaive_CCORR(
83 const oclMat &image, const oclMat &templ, oclMat &result, int cn);
85 void extractFirstChannel_32F(
86 const oclMat &image, oclMat &result);
88 // Evaluates optimal template's area threshold. If
89 // template's area is less than the threshold, we use naive match
90 // template version, otherwise FFT-based (if available)
91 static bool useNaive(int method, int depth, Size size)
94 if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)))
98 else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
100 return size.height < 18 && size.width < 18;
105 #define UNUSED(x) (void)(x);
106 UNUSED(method) UNUSED(depth) UNUSED(size)
112 //////////////////////////////////////////////////////////////////////
114 void matchTemplate_SQDIFF(
115 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
117 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
118 if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
120 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
125 buf.image_sqsums.resize(1);
127 // TODO, add double support for ocl::integral
128 // use CPU integral temporarily
130 cv::integral(Mat(image.reshape(1)), sums, sqsums);
131 buf.image_sqsums[0] = sqsums;
133 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
134 matchTemplate_CCORR(image, templ, result, buf);
136 //port CUDA's matchTemplatePrepared_SQDIFF_8U
137 Context *clCxt = image.clCxt;
138 String kernelName = "matchTemplate_Prepared_SQDIFF";
139 std::vector< std::pair<size_t, const void *> > args;
141 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
142 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
143 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
144 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
145 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
146 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
147 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
148 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
149 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
150 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
151 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
153 size_t globalThreads[3] = {result.cols, result.rows, 1};
154 size_t localThreads[3] = {16, 16, 1};
156 const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
157 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
161 void matchTemplate_SQDIFF_NORMED(
162 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
164 matchTemplate_CCORR(image, templ, result, buf);
165 buf.image_sums.resize(1);
167 integral(image.reshape(1), buf.image_sums[0]);
169 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
171 Context *clCxt = image.clCxt;
172 String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
173 std::vector< std::pair<size_t, const void *> > args;
175 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
176 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
177 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
178 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
179 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
180 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
181 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
182 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
183 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
184 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
185 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
187 size_t globalThreads[3] = {result.cols, result.rows, 1};
188 size_t localThreads[3] = {16, 16, 1};
189 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
192 void matchTemplateNaive_SQDIFF(
193 const oclMat &image, const oclMat &templ, oclMat &result, int)
195 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
196 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
198 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
199 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
201 Context *clCxt = image.clCxt;
202 String kernelName = "matchTemplate_Naive_SQDIFF";
204 std::vector< std::pair<size_t, const void *> > args;
206 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
207 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
208 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
209 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
210 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
211 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
212 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
213 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
214 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
215 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
216 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
217 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
218 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
219 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
220 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
222 size_t globalThreads[3] = {result.cols, result.rows, 1};
223 size_t localThreads[3] = {16, 16, 1};
224 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
227 //////////////////////////////////////////////////////////////////////
230 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
232 ConvolveBuf convolve_buf;
233 convolve_buf.user_block_size = buf.user_block_size;
234 if (image.oclchannels() == 1)
235 convolve(image, templ, result, true, convolve_buf);
239 convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
240 extractFirstChannel_32F(result_, result);
244 void matchTemplate_CCORR(
245 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
247 result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
248 if (useNaive(TM_CCORR, image.depth(), templ.size()))
250 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
255 if(image.depth() == CV_8U && templ.depth() == CV_8U)
257 image.convertTo(buf.imagef, CV_32F);
258 templ.convertTo(buf.templf, CV_32F);
259 convolve_32F(buf.imagef, buf.templf, result, buf);
263 convolve_32F(image, templ, result, buf);
268 void matchTemplate_CCORR_NORMED(
269 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
271 cv::ocl::oclMat temp;
272 matchTemplate_CCORR(image, templ, result, buf);
273 buf.image_sums.resize(1);
274 buf.image_sqsums.resize(1);
275 integral(image.reshape(1), buf.image_sums[0], temp);
276 if(temp.depth() == CV_64F)
277 temp.convertTo(buf.image_sqsums[0], CV_32FC1);
279 buf.image_sqsums[0] = temp;
280 unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
282 Context *clCxt = image.clCxt;
283 String kernelName = "normalizeKernel";
284 std::vector< std::pair<size_t, const void *> > args;
286 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
287 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
288 args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
289 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
290 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
291 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
292 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
293 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
294 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
295 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
296 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
298 size_t globalThreads[3] = {result.cols, result.rows, 1};
299 size_t localThreads[3] = {16, 16, 1};
300 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
303 void matchTemplateNaive_CCORR(
304 const oclMat &image, const oclMat &templ, oclMat &result, int)
306 CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
307 || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
309 CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
310 CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
312 Context *clCxt = image.clCxt;
313 String kernelName = "matchTemplate_Naive_CCORR";
315 std::vector< std::pair<size_t, const void *> > args;
317 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
318 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
319 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
320 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
321 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
322 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
323 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
324 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
325 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
326 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
327 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
328 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
329 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
330 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
331 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
333 size_t globalThreads[3] = {result.cols, result.rows, 1};
334 size_t localThreads[3] = {16, 16, 1};
335 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
337 //////////////////////////////////////////////////////////////////////
339 void matchTemplate_CCOFF(
340 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
342 CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
344 matchTemplate_CCORR(image, templ, result, buf);
346 Context *clCxt = image.clCxt;
349 kernelName = "matchTemplate_Prepared_CCOFF";
350 size_t globalThreads[3] = {result.cols, result.rows, 1};
351 size_t localThreads[3] = {16, 16, 1};
353 std::vector< std::pair<size_t, const void *> > args;
354 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
355 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
356 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
357 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
358 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
359 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
360 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
361 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
362 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
363 Vec4f templ_sum = Vec4f::all(0);
364 // to be continued in the following section
365 if(image.oclchannels() == 1)
367 buf.image_sums.resize(1);
368 integral(image, buf.image_sums[0]);
370 templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
371 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
372 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
373 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
374 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
379 split(image, buf.images);
380 templ_sum = sum(templ) / templ.size().area();
381 buf.image_sums.resize(buf.images.size());
384 for(int i = 0; i < image.oclchannels(); i ++)
386 integral(buf.images[i], buf.image_sums[i]);
388 switch(image.oclchannels())
391 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
392 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
393 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
394 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
395 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
396 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
397 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
398 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
399 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
400 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
403 CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
407 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
410 void matchTemplate_CCOFF_NORMED(
411 const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
413 image.convertTo(buf.imagef, CV_32F);
414 templ.convertTo(buf.templf, CV_32F);
416 matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
417 float scale = 1.f / templ.size().area();
419 Context *clCxt = image.clCxt;
422 kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
423 size_t globalThreads[3] = {result.cols, result.rows, 1};
424 size_t localThreads[3] = {16, 16, 1};
426 std::vector< std::pair<size_t, const void *> > args;
427 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
428 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
429 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
430 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
431 args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
432 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
433 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
434 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
435 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
436 args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
438 Vec4f templ_sum = Vec4f::all(0);
439 Vec4f templ_sqsum = Vec4f::all(0);
440 // to be continued in the following section
441 if(image.oclchannels() == 1)
443 buf.image_sums.resize(1);
444 buf.image_sqsums.resize(1);
445 cv::ocl::oclMat temp;
446 integral(image, buf.image_sums[0], temp);
447 if(temp.depth() == CV_64F)
448 temp.convertTo(buf.image_sqsums[0], CV_32FC1);
450 buf.image_sqsums[0] = temp;
452 templ_sum[0] = (float)sum(templ)[0];
454 templ_sqsum[0] = sqrSum(templ)[0];
456 templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
457 templ_sum[0] *= scale;
459 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
460 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
461 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
462 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
463 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
464 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
465 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
466 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
471 split(image, buf.images);
472 templ_sum = sum(templ);
474 templ_sqsum = sqrSum(templ);
476 templ_sqsum -= scale * templ_sum * templ_sum;
478 float templ_sqsum_sum = 0;
479 for(int i = 0; i < image.oclchannels(); i ++)
481 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
484 buf.image_sums.resize(buf.images.size());
485 buf.image_sqsums.resize(buf.images.size());
486 cv::ocl::oclMat temp;
487 for(int i = 0; i < image.oclchannels(); i ++)
489 integral(buf.images[i], buf.image_sums[i], temp);
490 if(temp.depth() == CV_64F)
491 temp.convertTo(buf.image_sqsums[i], CV_32FC1);
493 buf.image_sqsums[i] = temp;
496 switch(image.oclchannels())
499 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
500 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
501 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
502 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
503 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
504 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
505 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
506 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
507 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
508 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
509 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
510 args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
511 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
512 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
513 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
514 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
515 args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
518 CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
522 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
524 void extractFirstChannel_32F(const oclMat &image, oclMat &result)
526 Context *clCxt = image.clCxt;
529 kernelName = "extractFirstChannel";
530 size_t globalThreads[3] = {result.cols, result.rows, 1};
531 size_t localThreads[3] = {16, 16, 1};
533 std::vector< std::pair<size_t, const void *> > args;
534 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) );
535 args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
536 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
537 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
538 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
539 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
540 args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
541 args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
543 openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
548 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
550 MatchTemplateBuf buf;
551 matchTemplate(image, templ, result, method, buf);
553 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
555 CV_Assert(image.type() == templ.type());
556 CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
558 typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
560 const Caller callers[] =
562 ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
563 ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
564 ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
567 Caller caller = callers[method];
569 caller(image, templ, result, buf);