modules/ocl/src/match_template.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
  14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // @Authors
  18 //    Peng Xiao, pengxiao@multicorewareinc.com
  19 //
  20 // Redistribution and use in source and binary forms, with or without modification,
  21 // are permitted provided that the following conditions are met:
  22 //
  23 //   * Redistribution's of source code must retain the above copyright notice,
  24 //     this list of conditions and the following disclaimer.
  25 //
  26 //   * Redistribution's in binary form must reproduce the above copyright notice,
  27 //     this list of conditions and the following disclaimer in the documentation
  28 //     and/or other oclMaterials provided with the distribution.
  29 //
  30 //   * The name of the copyright holders may not be used to endorse or promote products
  31 //     derived from this software without specific prior written permission.
  32 //
  33 // This software is provided by the copyright holders and contributors as is and
  34 // any express or implied warranties, including, but not limited to, the implied
  35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  36 // In no event shall the Intel Corporation or contributors be liable for any direct,
  37 // indirect, incidental, special, exemplary, or consequential damages
  38 // (including, but not limited to, procurement of substitute goods or services;
  39 // loss of use, data, or profits; or business interruption) however caused
  40 // and on any theory of liability, whether in contract, strict liability,
  41 // or tort (including negligence or otherwise) arising in any way out of
  42 // the use of this software, even if advised of the possibility of such damage.
  43 //
  44 //M*/
  45
  46
  47 #include <iomanip>
  48 #include "precomp.hpp"
  49
  50 using namespace cv;
  51 using namespace cv::ocl;
  52 using namespace std;
  53
  54 #if !defined (HAVE_OPENCL)
  55 void cv::ocl::matchTemplate(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
  56 #else
  57 //helper routines
  58 namespace cv
  59 {
  60     namespace ocl
  61     {
  62         ///////////////////////////OpenCL kernel strings///////////////////////////
  63         extern const char *match_template;
  64     }
  65 }
  66
  67 namespace cv { namespace ocl
  68 {
  69     void matchTemplate_SQDIFF(
  70         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  71
  72     void matchTemplate_SQDIFF_NORMED(
  73         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  74
  75     void matchTemplate_CCORR(
  76         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  77
  78     void matchTemplate_CCORR_NORMED(
  79         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  80
  81     void matchTemplate_CCOFF(
  82         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  83
  84     void matchTemplate_CCOFF_NORMED(
  85         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf);
  86
  87
  88     void matchTemplateNaive_SQDIFF(
  89         const oclMat& image, const oclMat& templ, oclMat& result, int cn);
  90
  91     void matchTemplateNaive_CCORR(
  92         const oclMat& image, const oclMat& templ, oclMat& result, int cn);
  93
  94     // Evaluates optimal template's area threshold. If
  95     // template's area is less  than the threshold, we use naive match
  96     // template version, otherwise FFT-based (if available)
  97     int getTemplateThreshold(int method, int depth)
  98     {
  99         switch (method)
 100         {
 101         case CV_TM_CCORR:
 102             if (depth == CV_32F) return 250;
 103             if (depth == CV_8U) return 300;
 104             break;
 105         case CV_TM_SQDIFF:
 106             if (depth == CV_32F) return 0x7fffffff; // do naive SQDIFF for CV_32F
 107             if (depth == CV_8U) return 300;
 108             break;
 109         }
 110         CV_Error(CV_StsBadArg, "getTemplateThreshold: unsupported match template mode");
 111         return 0;
 112     }
 113
 114     //////////////////////////////////////////////////////////////////////
 115     // SQDIFF
 116     void matchTemplate_SQDIFF(
 117         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 118     {
 119         result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 120         if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
 121         {
 122             matchTemplateNaive_SQDIFF(image, templ, result, image.channels());
 123             return;
 124         }
 125         else
 126         {
 127             // TODO
 128             CV_Error(CV_StsBadArg, "Not supported yet for this size template");
 129         }
 130     }
 131
 132     void matchTemplate_SQDIFF_NORMED(
 133         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 134     {
 135         matchTemplate_CCORR(image,templ,result,buf);
 136         buf.image_sums.resize(1);
 137
 138
 139         integral(image.reshape(1), buf.image_sums[0]);
 140
 141 #if SQRSUM_FIXED
 142         unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 143 #else
 144         Mat sqr_mat = templ.reshape(1);
 145         unsigned long long templ_sqsum = (unsigned long long)sum(sqr_mat.mul(sqr_mat))[0];
 146 #endif
 147
 148         Context *clCxt = image.clCxt;
 149         string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
 150         vector< pair<size_t, const void *> > args;
 151
 152         args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
 153         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 154         args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
 155         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 156         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 157         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 158         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 159         args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
 160         args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
 161         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 162         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 163
 164         size_t globalThreads[3] = {result.cols, result.rows, 1};
 165         size_t localThreads[3]  = {32, 8, 1};
 166         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
 167     }
 168
 169     void matchTemplateNaive_SQDIFF(
 170         const oclMat& image, const oclMat& templ, oclMat& result, int cn)
 171     {
 172         CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
 173             || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
 174         CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
 175         CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 176
 177         Context *clCxt = image.clCxt;
 178         string kernelName = "matchTemplate_Naive_SQDIFF";
 179
 180         vector< pair<size_t, const void *> > args;
 181
 182         args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
 183         args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
 184         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 185         args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
 186         args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
 187         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 188         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 189         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 190         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 191         args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
 192         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
 193         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 194         args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
 195         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
 196         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 197
 198         size_t globalThreads[3] = {result.cols, result.rows, 1};
 199         size_t localThreads[3]  = {32, 8, 1};
 200         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
 201     }
 202
 203     //////////////////////////////////////////////////////////////////////
 204     // CCORR
 205     void matchTemplate_CCORR(
 206         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 207     {
 208         result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 209         if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, image.depth()))
 210         {
 211             matchTemplateNaive_CCORR(image, templ, result, image.channels());
 212             return;
 213         }
 214         else
 215         {
 216             CV_Error(CV_StsBadArg, "Not supported yet for this size template");
 217             if(image.depth() == CV_8U && templ.depth() == CV_8U)
 218             {
 219                 image.convertTo(buf.imagef, CV_32F);
 220                 templ.convertTo(buf.templf, CV_32F);
 221             }
 222             CV_Assert(image.channels() == 1);
 223             oclMat o_result(image.size(), CV_MAKETYPE(CV_32F, image.channels()));
 224             filter2D(buf.imagef,o_result,CV_32F,buf.templf, Point(0,0));
 225             result = o_result(Rect(0,0,image.rows - templ.rows + 1, image.cols - templ.cols + 1));
 226         }
 227     }
 228
 229     void matchTemplate_CCORR_NORMED(
 230         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 231     {
 232         matchTemplate_CCORR(image,templ,result,buf);
 233         buf.image_sums.resize(1);
 234         buf.image_sqsums.resize(1);
 235
 236         integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
 237 #if SQRSUM_FIXED
 238         unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 239 #else
 240         oclMat templ_c1 = templ.reshape(1);
 241         multiply(templ_c1, templ_c1, templ_c1);
 242         unsigned long long templ_sqsum = (unsigned long long)sum(templ_c1)[0];
 243 #endif
 244         Context *clCxt = image.clCxt;
 245         string kernelName = "normalizeKernel";
 246         vector< pair<size_t, const void *> > args;
 247
 248         args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
 249         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 250         args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
 251         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 252         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 253         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 254         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 255         args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
 256         args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
 257         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 258         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 259
 260         size_t globalThreads[3] = {result.cols, result.rows, 1};
 261         size_t localThreads[3]  = {32, 8, 1};
 262         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
 263     }
 264
 265     void matchTemplateNaive_CCORR(
 266         const oclMat& image, const oclMat& templ, oclMat& result, int cn)
 267     {
 268         CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
 269             || (image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F);
 270         CV_Assert(image.channels() == templ.channels() && (image.channels() == 1 || image.channels() == 4) && result.channels() == 1);
 271         CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 272
 273         Context *clCxt = image.clCxt;
 274         string kernelName = "matchTemplate_Naive_CCORR";
 275
 276         vector< pair<size_t, const void *> > args;
 277
 278         args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
 279         args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
 280         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
 281         args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
 282         args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
 283         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
 284         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
 285         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
 286         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
 287         args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
 288         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
 289         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 290         args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
 291         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
 292         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 293
 294         size_t globalThreads[3] = {result.cols, result.rows, 1};
 295         size_t localThreads[3]  = {32, 8, 1};
 296         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
 297     }
 298     //////////////////////////////////////////////////////////////////////
 299     // CCOFF
 300     void matchTemplate_CCOFF(
 301         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 302     {
 303         CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
 304
 305         matchTemplate_CCORR(image,templ,result,buf);
 306
 307         Context *clCxt = image.clCxt;
 308         string kernelName;
 309
 310         kernelName = "matchTemplate_Prepared_CCOFF";
 311         size_t globalThreads[3] = {result.cols, result.rows, 1};
 312         size_t localThreads[3]  = {32, 8, 1};
 313
 314         vector< pair<size_t, const void *> > args;
 315         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
 316         args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
 317         args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
 318         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
 319         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
 320         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
 321         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
 322         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 323         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 324         // to be continued in the following section
 325         if(image.channels() == 1)
 326         {
 327             buf.image_sums.resize(1);
 328             integral(image, buf.image_sums[0]);
 329
 330             float templ_sum = 0;
 331             templ_sum = (float)sum(templ)[0] / templ.size().area();
 332             args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 333             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 334             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 335             args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
 336         }
 337         else
 338         {
 339             Vec4f templ_sum = Vec4f::all(0);
 340             split(image,buf.images);
 341             templ_sum = sum(templ) / templ.size().area();
 342             buf.image_sums.resize(buf.images.size());
 343
 344             for(int i = 0; i < image.channels(); i ++)
 345             {
 346                 integral(buf.images[i], buf.image_sums[i]);
 347             }
 348             switch(image.channels())
 349             {
 350             case 4:
 351                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 352                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
 353                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
 354                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
 355                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 356                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 357                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
 358                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
 359                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
 360                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
 361                 break;
 362             default:
 363                 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
 364                 break;
 365             }
 366         }
 367         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
 368     }
 369
 370     void matchTemplate_CCOFF_NORMED(
 371         const oclMat& image, const oclMat& templ, oclMat& result, MatchTemplateBuf &buf)
 372     {
 373         image.convertTo(buf.imagef, CV_32F);
 374         templ.convertTo(buf.templf, CV_32F);
 375
 376         matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
 377         float scale = 1.f/templ.size().area();
 378
 379         Context *clCxt = image.clCxt;
 380         string kernelName;
 381
 382         kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
 383         size_t globalThreads[3] = {result.cols, result.rows, 1};
 384         size_t localThreads[3]  = {32, 8, 1};
 385
 386         vector< pair<size_t, const void *> > args;
 387         args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
 388         args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
 389         args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
 390         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
 391         args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
 392         args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
 393         args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
 394         args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
 395         args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
 396         args.push_back( make_pair( sizeof(cl_float),(void *)&scale) );
 397         // to be continued in the following section
 398         if(image.channels() == 1)
 399         {
 400             buf.image_sums.resize(1);
 401             buf.image_sqsums.resize(1);
 402             integral(image, buf.image_sums[0], buf.image_sqsums[0]);
 403             float templ_sum = 0;
 404             float templ_sqsum = 0;
 405             templ_sum   = (float)sum(templ)[0];
 406 #if SQRSUM_FIXED
 407             templ_sqsum = sqrSum(templ)[0];
 408 #else
 409             oclMat templ_sqr = templ;
 410             multiply(templ,templ, templ_sqr);
 411             templ_sqsum  = sum(templ_sqr)[0];
 412 #endif //SQRSUM_FIXED
 413             templ_sqsum -= scale * templ_sum * templ_sum;
 414             templ_sum   *= scale;
 415
 416             args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 417             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 418             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 419             args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
 420             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
 421             args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
 422             args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum) );
 423             args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum) );
 424         }
 425         else
 426         {
 427             Vec4f templ_sum   = Vec4f::all(0);
 428             Vec4f templ_sqsum = Vec4f::all(0);
 429
 430             split(image,buf.images);
 431             templ_sum   = sum(templ);
 432 #if SQRSUM_FIXED
 433             templ_sqsum = sqrSum(templ);
 434 #else
 435             oclMat templ_sqr = templ;
 436             multiply(templ,templ, templ_sqr);
 437             templ_sqsum  = sum(templ_sqr);
 438 #endif //SQRSUM_FIXED
 439             templ_sqsum -= scale * templ_sum * templ_sum;
 440
 441             float templ_sqsum_sum = 0;
 442             for(int i = 0; i < image.channels(); i ++)
 443             {
 444                 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
 445             }
 446             templ_sum   *= scale;
 447             buf.image_sums.resize(buf.images.size());
 448             buf.image_sqsums.resize(buf.images.size());
 449
 450             for(int i = 0; i < image.channels(); i ++)
 451             {
 452                 integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
 453             }
 454
 455             switch(image.channels())
 456             {
 457             case 4:
 458                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
 459                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
 460                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
 461                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
 462                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
 463                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
 464                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
 465                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
 466                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
 467                 args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
 468                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
 469                 args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
 470                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[0]) );
 471                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[1]) );
 472                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[2]) );
 473                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sum[3]) );
 474                 args.push_back( make_pair( sizeof(cl_float),(void *)&templ_sqsum_sum) );
 475                 break;
 476             default:
 477                 CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
 478                 break;
 479             }
 480         }
 481         openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.channels(), image.depth());
 482     }
 483
 484 }/*ocl*/} /*cv*/
 485
 486 void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
 487 {
 488     MatchTemplateBuf buf;
 489     matchTemplate(image,templ, result, method,buf);
 490 }
 491 void cv::ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf& buf)
 492 {
 493     CV_Assert(image.type() == templ.type());
 494     CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
 495
 496     typedef void (*Caller)(const oclMat&, const oclMat&, oclMat&, MatchTemplateBuf&);
 497
 498     const Caller callers[] = {
 499         ::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
 500         ::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
 501         ::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
 502     };
 503
 504     Caller caller = callers[method];
 505     CV_Assert(caller);
 506     caller(image, templ, result, buf);
 507 }
 508 #endif //