modules/imgproc/src/templmatch.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "precomp.hpp"
  43 #include "opencl_kernels.hpp"
  44
  45 //////////////////////////////////////////////////matchTemplate//////////////////////////////////////////////////////////
  46 namespace cv
  47 {
  48     static bool matchTemplate_CCORR(const UMat &image, const UMat &templ, UMat &result);
  49     static bool matchTemplate_CCORR_NORMED(const UMat &image, const UMat &templ, UMat &result);
  50
  51     static bool matchTemplate_SQDIFF(const UMat &image, const UMat &templ, UMat &result);
  52     static bool matchTemplate_SQDIFF_NORMED (const UMat &image, const UMat &templ, UMat &result);
  53
  54     static bool matchTemplate_CCOEFF(const UMat &image, const UMat &templ, UMat &result);
  55     static bool matchTemplate_CCOEFF_NORMED(const UMat &image, const UMat &templ, UMat &result);
  56
  57     static bool matchTemplateNaive_CCORR (const UMat &image, const UMat &templ, UMat &result, int cn);
  58     static bool matchTemplateNaive_SQDIFF(const UMat &image, const UMat &templ, UMat &result, int cn);
  59
  60     static bool useNaive(int method, int depth, Size size)
  61     {
  62 #ifdef HAVE_CLAMDFFT
  63         if (method == TM_SQDIFF && depth == CV_32F)
  64             return true;
  65         else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
  66             return size.height < 18 && size.width < 18;
  67         else
  68             return false;
  69 #else
  70 #define UNUSED(x) (void)(x);
  71             UNUSED(method) UNUSED(depth) UNUSED(size)
  72 #undef  UNUSED
  73             return true;
  74 #endif
  75     }
  76
  77 ///////////////////////////////////////////////////CCORR//////////////////////////////////////////////////////////////
  78
  79     static bool matchTemplate_CCORR(const UMat &image, const UMat &templ, UMat &result)
  80         {
  81             if (useNaive(TM_CCORR, image.depth(), templ.size())  )
  82                 return matchTemplateNaive_CCORR(image, templ, result, image.channels());
  83             else
  84                 return false;
  85         }
  86
  87     static bool matchTemplateNaive_CCORR (const UMat &image, const UMat &templ, UMat &result, int cn)
  88     {
  89         int type = image.type();
  90         int depth = CV_MAT_DEPTH(type);
  91
  92         CV_Assert(result.channels() == 1);
  93
  94         const char * kernelName = "matchTemplate_Naive_CCORR";
  95
  96         ocl::Kernel k (kernelName, ocl::imgproc::match_template_oclsrc, format("-D type=%s -D elem_type=%s -D cn=%d",ocl::typeToStr(type), ocl::typeToStr(depth), cn));
  97         if (k.empty())
  98             return false;
  99
 100         size_t globalsize[2] = {result.cols, result.rows};
 101         size_t localsize[2]  = {16, 16};
 102
 103         return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)).run(2,globalsize,localsize,true);
 104     }
 105
 106     static bool matchTemplate_CCORR_NORMED(const UMat &image, const UMat &templ, UMat &result)
 107     {
 108         if (!matchTemplate_CCORR(image, templ, result))
 109             return false;
 110
 111         int type = image.type();
 112         int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 113
 114         const char * kernelName = "matchTemplate_CCORR_NORMED";
 115
 116         ocl::Kernel k(kernelName, ocl::imgproc::match_template_oclsrc, format("-D type=%s -D elem_type=%s -D cn=%d",ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 117         if (k.empty())
 118             return false;
 119
 120         UMat temp, image_sums, image_sqsums;
 121         integral(image.reshape(1), image_sums, temp);
 122
 123         if(temp.depth() == CV_64F)
 124             temp.convertTo(image_sqsums, CV_32F);
 125         else
 126             image_sqsums = temp;
 127
 128         UMat templ_resh;
 129         templ.reshape(1).convertTo(templ_resh, CV_32F);
 130
 131         multiply(templ_resh, templ_resh, temp);
 132         unsigned long long templ_sqsum = (unsigned long long)sum(temp)[0];
 133
 134         size_t globalsize[2] = {result.cols, result.rows};
 135         size_t localsize[2]  = {16, 16};
 136
 137         return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, templ_sqsum).run(2,globalsize,localsize,true);
 138     }
 139
 140 //////////////////////////////////////SQDIFF//////////////////////////////////////////////////////////////
 141
 142     static bool matchTemplate_SQDIFF(const UMat &image, const UMat &templ, UMat &result)
 143     {
 144          if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
 145             {
 146                 return matchTemplateNaive_SQDIFF(image, templ, result, image.channels());;
 147             }
 148             else
 149                 return false;
 150     }
 151
 152     static bool matchTemplateNaive_SQDIFF(const UMat &image, const UMat &templ, UMat &result, int cn)
 153     {
 154         int type = image.type();
 155         int depth = CV_MAT_DEPTH(type);
 156
 157         CV_Assert(result.channels() == 1);
 158
 159         const char * kernelName = "matchTemplate_Naive_SQDIFF";
 160
 161         ocl::Kernel k (kernelName, ocl::imgproc::match_template_oclsrc, format("-D type=%s -D elem_type=%s -D cn=%d",ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 162         if (k.empty())
 163             return false;
 164
 165         size_t globalsize[2] = {result.cols, result.rows};
 166         size_t localsize[2]  = {16, 16};
 167
 168         return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), ocl::KernelArg::WriteOnly(result)).run(2,globalsize,localsize,true);
 169     }
 170
 171     static bool matchTemplate_SQDIFF_NORMED (const UMat &image, const UMat &templ, UMat &result)
 172     {
 173         if (!matchTemplate_CCORR(image, templ, result))
 174             return false;
 175
 176         int type = image.type();
 177         int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 178
 179         const char * kernelName = "matchTemplate_SQDIFF_NORMED";
 180
 181         ocl::Kernel k(kernelName, ocl::imgproc::match_template_oclsrc, format("-D type=%s -D elem_type=%s -D cn=%d",ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 182         if (k.empty())
 183             return false;
 184
 185         UMat temp, image_sums, image_sqsums;
 186         integral(image.reshape(1), image_sums, temp);
 187
 188         if(temp.depth() == CV_64F)
 189             temp.convertTo(image_sqsums, CV_32F);
 190         else
 191             image_sqsums = temp;
 192
 193         UMat templ_resh;
 194         templ.reshape(1).convertTo(templ_resh, CV_32F);
 195
 196         multiply(templ_resh, templ_resh, temp);
 197         unsigned long long templ_sqsum = (unsigned long long)sum(temp)[0];
 198
 199         size_t globalsize[2] = {result.cols, result.rows};
 200         size_t localsize[2]  = {16, 16};
 201
 202         return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, templ_sqsum).run(2,globalsize,localsize,true);
 203     }
 204
 205 /////////////////////////////////////CCOEFF/////////////////////////////////////////////////////////////////
 206
 207     static bool matchTemplate_CCOEFF(const UMat &image, const UMat &templ, UMat &result)
 208     {
 209         if (!matchTemplate_CCORR(image, templ, result))
 210             return false;
 211
 212         UMat image_sums;
 213         integral(image, image_sums);
 214
 215         int type = image_sums.type();
 216         int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 217
 218         const char * kernelName;
 219
 220         if (cn==1)
 221             kernelName = "matchTemplate_Prepared_CCOEFF_C1";
 222         else if (cn==2)
 223             kernelName = "matchTemplate_Prepared_CCOEFF_C2";
 224         else
 225             kernelName = "matchTemplate_Prepared_CCOEFF_C4";
 226
 227         ocl::Kernel k(kernelName, ocl::imgproc::match_template_oclsrc, format("-D type=%s -D elem_type=%s -D cn=%d",ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 228         if (k.empty())
 229             return false;
 230
 231         size_t globalsize[2] = {result.cols, result.rows};
 232         size_t localsize[2]  = {16, 16};
 233
 234         if (cn==1)
 235         {
 236             float templ_sum = (float)sum(templ)[0]/ templ.size().area();
 237             return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, templ_sum).run(2,globalsize,localsize,true);
 238         }
 239         else
 240         {
 241             Vec4f templ_sum = Vec4f::all(0);
 242             templ_sum = sum(templ)/ templ.size().area();
 243             if (cn==2)
 244                 return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols,
 245                 templ_sum[0],templ_sum[1]).run(2,globalsize,localsize,true);
 246
 247             return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols,
 248             templ_sum[0],templ_sum[1],templ_sum[2],templ_sum[3]).run(2,globalsize,localsize,true);
 249         }
 250     }
 251
 252     static bool matchTemplate_CCOEFF_NORMED(const UMat &image, const UMat &templ, UMat &result)
 253     {
 254         UMat imagef, templf;
 255         image.convertTo(imagef, CV_32F);
 256         templ.convertTo(templf, CV_32F);
 257
 258         if(!matchTemplate_CCORR(imagef, templf, result))
 259             return false;
 260
 261         const char * kernelName;
 262
 263         UMat temp, image_sums, image_sqsums;
 264         integral(image,image_sums, temp);
 265
 266         int type = image_sums.type();
 267         int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 268
 269         if (cn== 1)
 270             kernelName = "matchTemplate_CCOEFF_NORMED_C1";
 271         else if (cn==2)
 272             kernelName = "matchTemplate_CCOEFF_NORMED_C2";
 273         else
 274             kernelName = "matchTemplate_CCOEFF_NORMED_C4";
 275
 276         ocl::Kernel k(kernelName, ocl::imgproc::match_template_oclsrc,
 277             format("-D type=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 278         if (k.empty())
 279             return false;
 280
 281         if(temp.depth() == CV_64F)
 282             temp.convertTo(image_sqsums, CV_32F);
 283         else
 284             image_sqsums = temp;
 285
 286         size_t globalsize[2] = {result.cols, result.rows};
 287         size_t localsize[2]  = {16, 16};
 288
 289         float scale = 1.f / templ.size().area();
 290
 291         if (cn==1)
 292         {
 293             float templ_sum = (float)sum(templ)[0];
 294
 295             multiply(templf, templf, temp);
 296             float templ_sqsum = (float)sum(temp)[0];
 297
 298             templ_sqsum -= scale * templ_sum * templ_sum;
 299             templ_sum   *= scale;
 300
 301             if (templ_sqsum < DBL_EPSILON)
 302             {
 303                 result = Scalar::all(1);
 304                 return true;
 305             }
 306
 307             return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums),ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 308                           ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum)
 309                          .run(2,globalsize,localsize,true);
 310         }
 311         else
 312         {
 313             Vec4f templ_sum = Vec4f::all(0);
 314             Vec4f templ_sqsum = Vec4f::all(0);
 315
 316             templ_sum = sum(templ);
 317
 318             multiply(templf, templf, temp);
 319             templ_sqsum = sum(temp);
 320
 321             float templ_sqsum_sum = 0;
 322                 for(int i = 0; i < cn; i ++)
 323                 {
 324                     templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
 325                 }
 326
 327             templ_sum   *= scale;
 328
 329             if (templ_sqsum_sum < DBL_EPSILON)
 330             {
 331                 result = Scalar::all(1);
 332                 return true;
 333             }
 334
 335             if (cn==2)
 336                  return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 337                                ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, scale,
 338                                templ_sum[0],templ_sum[1], templ_sqsum_sum)
 339                                .run(2,globalsize,localsize,true);
 340
 341             return k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 342                           ocl::KernelArg::WriteOnly(result), templ.rows, templ.cols, scale,
 343                           templ_sum[0],templ_sum[1],templ_sum[2],templ_sum[3], templ_sqsum_sum)
 344                          .run(2,globalsize,localsize,true);
 345         }
 346
 347     }
 348
 349 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 350
 351     static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
 352     {
 353         int type = _img.type();
 354         int cn = CV_MAT_CN(type);
 355
 356         CV_Assert( cn == _templ.channels() && cn!=3 && cn<=4);
 357
 358         typedef bool (*Caller)(const UMat &, const UMat &, UMat &);
 359
 360         const Caller callers[] =
 361         {
 362             matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
 363             matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
 364         };
 365
 366         Caller caller = callers[method];
 367
 368         UMat image = _img.getUMat();
 369         UMat templ = _templ.getUMat(), result;
 370         _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 371         result = _result.getUMat();
 372         return caller(image, templ, result);
 373     }
 374 }
 375
 376 namespace cv
 377 {
 378
 379 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
 380                 Size corrsize, int ctype,
 381                 Point anchor, double delta, int borderType )
 382 {
 383     const double blockScale = 4.5;
 384     const int minBlockSize = 256;
 385     std::vector<uchar> buf;
 386
 387     Mat templ = _templ;
 388     int depth = img.depth(), cn = img.channels();
 389     int tdepth = templ.depth(), tcn = templ.channels();
 390     int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
 391
 392     CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
 393
 394     if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
 395     {
 396         _templ.convertTo(templ, std::max(CV_32F, depth));
 397         tdepth = templ.depth();
 398     }
 399
 400     CV_Assert( depth == tdepth || tdepth == CV_32F);
 401     CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
 402                corrsize.width <= img.cols + templ.cols - 1 );
 403
 404     CV_Assert( ccn == 1 || delta == 0 );
 405
 406     corr.create(corrsize, ctype);
 407
 408     int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
 409     Size blocksize, dftsize;
 410
 411     blocksize.width = cvRound(templ.cols*blockScale);
 412     blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
 413     blocksize.width = std::min( blocksize.width, corr.cols );
 414     blocksize.height = cvRound(templ.rows*blockScale);
 415     blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
 416     blocksize.height = std::min( blocksize.height, corr.rows );
 417
 418     dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
 419     dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
 420     if( dftsize.width <= 0 || dftsize.height <= 0 )
 421         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
 422
 423     // recompute block size
 424     blocksize.width = dftsize.width - templ.cols + 1;
 425     blocksize.width = MIN( blocksize.width, corr.cols );
 426     blocksize.height = dftsize.height - templ.rows + 1;
 427     blocksize.height = MIN( blocksize.height, corr.rows );
 428
 429     Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
 430     Mat dftImg( dftsize, maxDepth );
 431
 432     int i, k, bufSize = 0;
 433     if( tcn > 1 && tdepth != maxDepth )
 434         bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
 435
 436     if( cn > 1 && depth != maxDepth )
 437         bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
 438             (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
 439
 440     if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
 441         bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
 442
 443     buf.resize(bufSize);
 444
 445     // compute DFT of each template plane
 446     for( k = 0; k < tcn; k++ )
 447     {
 448         int yofs = k*dftsize.height;
 449         Mat src = templ;
 450         Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
 451         Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
 452
 453         if( tcn > 1 )
 454         {
 455             src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
 456             int pairs[] = {k, 0};
 457             mixChannels(&templ, 1, &src, 1, pairs, 1);
 458         }
 459
 460         if( dst1.data != src.data )
 461             src.convertTo(dst1, dst1.depth());
 462
 463         if( dst.cols > templ.cols )
 464         {
 465             Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
 466             part = Scalar::all(0);
 467         }
 468         dft(dst, dst, 0, templ.rows);
 469     }
 470
 471     int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
 472     int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
 473     int tileCount = tileCountX * tileCountY;
 474
 475     Size wholeSize = img.size();
 476     Point roiofs(0,0);
 477     Mat img0 = img;
 478
 479     if( !(borderType & BORDER_ISOLATED) )
 480     {
 481         img.locateROI(wholeSize, roiofs);
 482         img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
 483                        roiofs.x, wholeSize.width-img.cols-roiofs.x);
 484     }
 485     borderType |= BORDER_ISOLATED;
 486
 487     // calculate correlation by blocks
 488     for( i = 0; i < tileCount; i++ )
 489     {
 490         int x = (i%tileCountX)*blocksize.width;
 491         int y = (i/tileCountX)*blocksize.height;
 492
 493         Size bsz(std::min(blocksize.width, corr.cols - x),
 494                  std::min(blocksize.height, corr.rows - y));
 495         Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
 496         int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
 497         int x1 = std::max(0, x0), y1 = std::max(0, y0);
 498         int x2 = std::min(img0.cols, x0 + dsz.width);
 499         int y2 = std::min(img0.rows, y0 + dsz.height);
 500         Mat src0(img0, Range(y1, y2), Range(x1, x2));
 501         Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
 502         Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
 503         Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
 504
 505         for( k = 0; k < cn; k++ )
 506         {
 507             Mat src = src0;
 508             dftImg = Scalar::all(0);
 509
 510             if( cn > 1 )
 511             {
 512                 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
 513                 int pairs[] = {k, 0};
 514                 mixChannels(&src0, 1, &src, 1, pairs, 1);
 515             }
 516
 517             if( dst1.data != src.data )
 518                 src.convertTo(dst1, dst1.depth());
 519
 520             if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
 521                 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
 522                                x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
 523
 524             dft( dftImg, dftImg, 0, dsz.height );
 525             Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
 526                                          dftsize.width, dftsize.height));
 527             mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
 528             dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
 529
 530             src = dftImg(Rect(0, 0, bsz.width, bsz.height));
 531
 532             if( ccn > 1 )
 533             {
 534                 if( cdepth != maxDepth )
 535                 {
 536                     Mat plane(bsz, cdepth, &buf[0]);
 537                     src.convertTo(plane, cdepth, 1, delta);
 538                     src = plane;
 539                 }
 540                 int pairs[] = {0, k};
 541                 mixChannels(&src, 1, &cdst, 1, pairs, 1);
 542             }
 543             else
 544             {
 545                 if( k == 0 )
 546                     src.convertTo(cdst, cdepth, 1, delta);
 547                 else
 548                 {
 549                     if( maxDepth != cdepth )
 550                     {
 551                         Mat plane(bsz, cdepth, &buf[0]);
 552                         src.convertTo(plane, cdepth);
 553                         src = plane;
 554                     }
 555                     add(src, cdst, cdst);
 556                 }
 557             }
 558         }
 559     }
 560 }
 561 }
 562
 563 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 564
 565 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method )
 566 {
 567     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
 568
 569     CV_Assert( (_img.depth() == CV_8U || _img.depth() == CV_32F) && _img.type() == _templ.type() );
 570
 571     CV_Assert(_img.size().height >= _templ.size().height && _img.size().width >= _templ.size().width);
 572
 573     CV_Assert(_img.dims() <= 2);
 574
 575     bool use_opencl = ocl::useOpenCL() && _result.isUMat();
 576     if ( use_opencl && ocl_matchTemplate(_img,_templ,_result,method))
 577             return;
 578
 579     int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
 580                   method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
 581     bool isNormed = method == CV_TM_CCORR_NORMED ||
 582                     method == CV_TM_SQDIFF_NORMED ||
 583                     method == CV_TM_CCOEFF_NORMED;
 584
 585     Mat img = _img.getMat(), templ = _templ.getMat();
 586     if( img.rows < templ.rows || img.cols < templ.cols )
 587         std::swap(img, templ);
 588
 589     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
 590     _result.create(corrSize, CV_32F);
 591     Mat result = _result.getMat();
 592
 593 #ifdef HAVE_TEGRA_OPTIMIZATION
 594     if (tegra::matchTemplate(img, templ, result, method))
 595         return;
 596 #endif
 597
 598     int cn = img.channels();
 599     crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
 600
 601     if( method == CV_TM_CCORR )
 602         return;
 603
 604     double invArea = 1./((double)templ.rows * templ.cols);
 605
 606     Mat sum, sqsum;
 607     Scalar templMean, templSdv;
 608     double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
 609     double templNorm = 0, templSum2 = 0;
 610
 611     if( method == CV_TM_CCOEFF )
 612     {
 613         integral(img, sum, CV_64F);
 614         templMean = mean(templ);
 615     }
 616     else
 617     {
 618         integral(img, sum, sqsum, CV_64F);
 619         meanStdDev( templ, templMean, templSdv );
 620
 621         templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
 622
 623         if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
 624         {
 625             result = Scalar::all(1);
 626             return;
 627         }
 628
 629         templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
 630
 631         if( numType != 1 )
 632         {
 633             templMean = Scalar::all(0);
 634             templNorm = templSum2;
 635         }
 636
 637         templSum2 /= invArea;
 638         templNorm = std::sqrt(templNorm);
 639         templNorm /= std::sqrt(invArea); // care of accuracy here
 640
 641         q0 = (double*)sqsum.data;
 642         q1 = q0 + templ.cols*cn;
 643         q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
 644         q3 = q2 + templ.cols*cn;
 645     }
 646
 647     double* p0 = (double*)sum.data;
 648     double* p1 = p0 + templ.cols*cn;
 649     double* p2 = (double*)(sum.data + templ.rows*sum.step);
 650     double* p3 = p2 + templ.cols*cn;
 651
 652     int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
 653     int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
 654
 655     int i, j, k;
 656
 657     for( i = 0; i < result.rows; i++ )
 658     {
 659         float* rrow = (float*)(result.data + i*result.step);
 660         int idx = i * sumstep;
 661         int idx2 = i * sqstep;
 662
 663         for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
 664         {
 665             double num = rrow[j], t;
 666             double wndMean2 = 0, wndSum2 = 0;
 667
 668             if( numType == 1 )
 669             {
 670                 for( k = 0; k < cn; k++ )
 671                 {
 672                     t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
 673                     wndMean2 += t*t;
 674                     num -= t*templMean[k];
 675                 }
 676
 677                 wndMean2 *= invArea;
 678             }
 679
 680             if( isNormed || numType == 2 )
 681             {
 682                 for( k = 0; k < cn; k++ )
 683                 {
 684                     t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
 685                     wndSum2 += t;
 686                 }
 687
 688                 if( numType == 2 )
 689                 {
 690                     num = wndSum2 - 2*num + templSum2;
 691                     num = MAX(num, 0.);
 692                 }
 693             }
 694
 695             if( isNormed )
 696             {
 697                 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
 698                 if( fabs(num) < t )
 699                     num /= t;
 700                 else if( fabs(num) < t*1.125 )
 701                     num = num > 0 ? 1 : -1;
 702                 else
 703                     num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
 704             }
 705
 706             rrow[j] = (float)num;
 707         }
 708     }
 709 }
 710
 711
 712 CV_IMPL void
 713 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
 714 {
 715     cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
 716         result = cv::cvarrToMat(_result);
 717     CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
 718                                          std::abs(img.rows - templ.rows) + 1) &&
 719               result.type() == CV_32F );
 720     matchTemplate(img, templ, result, method);
 721 }
 722
 723 /* End of file. */