modules/ocl/src/match_template.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //    Peng Xiao, pengxiao@multicorewareinc.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other oclMaterials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors as is and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46
  47 #include <iomanip>
  48 #include "precomp.hpp"
  49
  50 using namespace cv;
  51 using namespace cv::ocl;
  52 using namespace std;
  53
  54 #if !defined (HAVE_OPENCL)
  55 void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &)
  56 {
  57     throw_nogpu();
  58 }
  59 #else
  60 //helper routines
  61 namespace cv
  62 {
  63     namespace ocl
  64     {
  65         ///////////////////////////OpenCL kernel strings///////////////////////////
  66         extern const char *match_template;
  67     }
  68 }
  69
  70 namespace cv
  71 {
  72     namespace ocl
  73     {
  74         void matchTemplate_SQDIFF(
  75             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  76
  77         void matchTemplate_SQDIFF_NORMED(
  78             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  79
  80         void matchTemplate_CCORR(
  81             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  82
  83         void matchTemplate_CCORR_NORMED(
  84             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  85
  86         void matchTemplate_CCOFF(
  87             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  88
  89         void matchTemplate_CCOFF_NORMED(
  90             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
  91
  92
  93         void matchTemplateNaive_SQDIFF(
  94             const oclMat &image, const oclMat &templ, oclMat &result, int cn);
  95
  96         void matchTemplateNaive_CCORR(
  97             const oclMat &image, const oclMat &templ, oclMat &result, int cn);
  98
  99         // Evaluates optimal template's area threshold. If
 100         // template's area is less  than the threshold, we use naive match
 101         // template version, otherwise FFT-based (if available)
 102         static int getTemplateThreshold(int method, int depth)
 103         {
 104             switch (method)
 105             {
 106             case CV_TM_CCORR:
 107                 if (depth == CV_32F) return 250;
 108                 if (depth == CV_8U) return 300;
 109                 break;
 110             case CV_TM_SQDIFF:
 111                 if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
 112                 if (depth == CV_8U) return 300;
 113                 break;
 114             }
 115             CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
 116             return 0;
 117         }
 118
 119         //////////////////////////////////////////////////////////////////////
 120         // SQDIFF
 121         void matchTemplate_SQDIFF(
 122             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &)
 123         {
 124             result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 125             if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
 126             {
 127                 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
 128                 return;
 129             }
 130             else
 131             {
 132                 // TODO
 133                 CV_Error(CV_StsBadArg, "Not supported yet for this size template");
 134             }
 135         }
 136
 137         void matchTemplate_SQDIFF_NORMED(
 138             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
 139         {
 140             matchTemplate_CCORR(image, templ, result, buf);
 141             buf.image_sums.resize(1);
 142
 143
 144             integral(image.reshape(1), buf.image_sums[0]);
 145
 146             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 147
 148             Context *clCxt = image.clCxt;
 149             string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
 150             vector< pair<size_t, const void *> > args;
 151
 152             args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
 153             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 154             args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
 155             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 156             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 157             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 158             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 159             args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
 160             args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
 161             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 162             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 163
 164             size_t globalThreads[3] = {result.cols, result.rows, 1};
 165             size_t localThreads[3]  = {32, 8, 1};
 166             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
 167         }
 168
 169         void matchTemplateNaive_SQDIFF(
 170             const oclMat &image, const oclMat &templ, oclMat &result, int)
 171         {
 172             CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
 173                       || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
 174                      );
 175             CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
 176             CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 177
 178             Context *clCxt = image.clCxt;
 179             string kernelName = "matchTemplate_Naive_SQDIFF";
 180
 181             vector< pair<size_t, const void *> > args;
 182
 183             args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
 184             args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
 185             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 186             args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
 187             args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
 188             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 189             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 190             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 191             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 192             args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
 193             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
 194             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 195             args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
 196             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
 197             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 198
 199             size_t globalThreads[3] = {result.cols, result.rows, 1};
 200             size_t localThreads[3]  = {32, 8, 1};
 201             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
 202         }
 203
 204         //////////////////////////////////////////////////////////////////////
 205         // CCORR
 206         void matchTemplate_CCORR(
 207             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
 208         {
 209             result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 210             if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
 211             {
 212                 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
 213                 return;
 214             }
 215             else
 216             {
 217                 CV_Error(CV_StsBadArg, "Not supported yet for this size template");
 218                 if(image.depth() == CV_8U && templ.depth() == CV_8U)
 219                 {
 220                     image.convertTo(buf.imagef, CV_32F);
 221                     templ.convertTo(buf.templf, CV_32F);
 222                 }
 223                 CV_Assert(image.oclchannels() == 1);
 224                 oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.oclchannels()));
 225                 filter2D(buf.imagef, o_result, CV_32F, buf.templf, Point(0, 0));
 226                 result = o_result(Rect(0, 0, image.rows - templ.rows + 1, image.cols - templ.cols + 1));
 227             }
 228         }
 229
 230         void matchTemplate_CCORR_NORMED(
 231             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
 232         {
 233             matchTemplate_CCORR(image, templ, result, buf);
 234             buf.image_sums.resize(1);
 235             buf.image_sqsums.resize(1);
 236
 237             integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
 238
 239             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 240
 241             Context *clCxt = image.clCxt;
 242             string kernelName = "normalizeKernel";
 243             vector< pair<size_t, const void *> > args;
 244
 245             args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
 246             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 247             args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
 248             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 249             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 250             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 251             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 252             args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
 253             args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
 254             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 255             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 256
 257             size_t globalThreads[3] = {result.cols, result.rows, 1};
 258             size_t localThreads[3]  = {32, 8, 1};
 259             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
 260         }
 261
 262         void matchTemplateNaive_CCORR(
 263             const oclMat &image, const oclMat &templ, oclMat &result, int)
 264         {
 265             CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
 266                       || ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
 267                      );
 268             CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
 269             CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 270
 271             Context *clCxt = image.clCxt;
 272             string kernelName = "matchTemplate_Naive_CCORR";
 273
 274             vector< pair<size_t, const void *> > args;
 275
 276             args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
 277             args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
 278             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 279             args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
 280             args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
 281             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 282             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 283             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 284             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 285             args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
 286             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
 287             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 288             args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
 289             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
 290             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 291
 292             size_t globalThreads[3] = {result.cols, result.rows, 1};
 293             size_t localThreads[3]  = {32, 8, 1};
 294             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
 295         }
 296         //////////////////////////////////////////////////////////////////////
 297         // CCOFF
 298         void matchTemplate_CCOFF(
 299             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
 300         {
 301             CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
 302
 303             matchTemplate_CCORR(image, templ, result, buf);
 304
 305             Context *clCxt = image.clCxt;
 306             string kernelName;
 307
 308             kernelName = "matchTemplate_Prepared_CCOFF";
 309             size_t globalThreads[3] = {result.cols, result.rows, 1};
 310             size_t localThreads[3]  = {32, 8, 1};
 311
 312             vector< pair<size_t, const void *> > args;
 313             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
 314             args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
 315             args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
 316             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
 317             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
 318             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
 319             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
 320             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 321             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 322             // to be continued in the following section
 323             if(image.oclchannels() == 1)
 324             {
 325                 buf.image_sums.resize(1);
 326                 integral(image, buf.image_sums[0]);
 327
 328                 float templ_sum = 0;
 329                 templ_sum = (float)sum(templ)[0] / templ.size().area();
 330                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 331                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 332                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 333                 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
 334             }
 335             else
 336             {
 337                 Vec4f templ_sum = Vec4f::all(0);
 338                 split(image, buf.images);
 339                 templ_sum = sum(templ) / templ.size().area();
 340                 buf.image_sums.resize(buf.images.size());
 341
 342
 343                 for(int i = 0; i < image.oclchannels(); i ++)
 344                 {
 345                     integral(buf.images[i], buf.image_sums[i]);
 346                 }
 347                 switch(image.oclchannels())
 348                 {
 349                 case 4:
 350                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 351                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
 352                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
 353                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
 354                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 355                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 356                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
 357                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
 358                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
 359                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
 360                     break;
 361                 default:
 362                     CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
 363                     break;
 364                 }
 365             }
 366             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
 367         }
 368
 369         void matchTemplate_CCOFF_NORMED(
 370             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
 371         {
 372             image.convertTo(buf.imagef, CV_32F);
 373             templ.convertTo(buf.templf, CV_32F);
 374
 375             matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
 376             float scale = 1.f / templ.size().area();
 377
 378             Context *clCxt = image.clCxt;
 379             string kernelName;
 380
 381             kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
 382             size_t globalThreads[3] = {result.cols, result.rows, 1};
 383             size_t localThreads[3]  = {32, 8, 1};
 384
 385             vector< pair<size_t, const void *> > args;
 386             args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
 387             args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
 388             args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
 389             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
 390             args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
 391             args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
 392             args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
 393             args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 394             args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 395             args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
 396             // to be continued in the following section
 397             if(image.oclchannels() == 1)
 398             {
 399                 buf.image_sums.resize(1);
 400                 buf.image_sqsums.resize(1);
 401                 integral(image, buf.image_sums[0], buf.image_sqsums[0]);
 402                 float templ_sum = 0;
 403                 float templ_sqsum = 0;
 404                 templ_sum   = (float)sum(templ)[0];
 405
 406                 templ_sqsum = sqrSum(templ)[0];
 407
 408                 templ_sqsum -= scale * templ_sum * templ_sum;
 409                 templ_sum   *= scale;
 410
 411                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 412                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 413                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 414                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
 415                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
 416                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
 417                 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum) );
 418                 args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum) );
 419             }
 420             else
 421             {
 422                 Vec4f templ_sum   = Vec4f::all(0);
 423                 Vec4f templ_sqsum = Vec4f::all(0);
 424
 425                 split(image, buf.images);
 426                 templ_sum   = sum(templ);
 427
 428                 templ_sqsum = sqrSum(templ);
 429
 430                 templ_sqsum -= scale * templ_sum * templ_sum;
 431
 432                 float templ_sqsum_sum = 0;
 433                 for(int i = 0; i < image.oclchannels(); i ++)
 434                 {
 435                     templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
 436                 }
 437                 templ_sum   *= scale;
 438                 buf.image_sums.resize(buf.images.size());
 439                 buf.image_sqsums.resize(buf.images.size());
 440
 441                 for(int i = 0; i < image.oclchannels(); i ++)
 442                 {
 443                     integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
 444                 }
 445
 446                 switch(image.oclchannels())
 447                 {
 448                 case 4:
 449                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 450                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
 451                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
 452                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
 453                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 454                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 455                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
 456                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
 457                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
 458                     args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
 459                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
 460                     args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
 461                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
 462                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
 463                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
 464                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
 465                     args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
 466                     break;
 467                 default:
 468                     CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
 469                     break;
 470                 }
 471             }
 472             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
 473         }
 474
 475     }/*ocl*/
 476 } /*cv*/
 477
 478 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
 479 {
 480     MatchTemplateBuf buf;
 481     matchTemplate(image, templ, result, method, buf);
 482 }
 483 void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
 484 {
 485     CV_Assert(image.type() == templ.type());
 486     CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
 487
 488     typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
 489
 490     const Caller callers[] =
 491     {
 492         ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
 493         ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
 494         ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
 495     };
 496
 497     Caller caller = callers[method];
 498     CV_Assert(caller);
 499     caller(image, templ, result, buf);
 500 }
 501 #endif //