modules/imgproc/src/templmatch.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "precomp.hpp"
  43 #include "opencl_kernels.hpp"
  44
  45 ////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
  46
  47 namespace cv
  48 {
  49
  50 #ifdef HAVE_OPENCL
  51
  52 /////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
  53
  54 enum
  55 {
  56     SUM_1 = 0, SUM_2 = 1
  57 };
  58
  59 static bool sumTemplate(InputArray _src, UMat & result)
  60 {
  61     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
  62     int wdepth = std::max(CV_32S, depth), wtype = CV_MAKE_TYPE(wdepth, cn);
  63     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
  64
  65     int wgs2_aligned = 1;
  66     while (wgs2_aligned < (int)wgs)
  67         wgs2_aligned <<= 1;
  68     wgs2_aligned >>= 1;
  69
  70     char cvt[40];
  71     ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
  72                   format("-D CALC_SUM -D T=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d -D wdepth=%d",
  73                          ocl::typeToStr(type), ocl::typeToStr(wtype), cn,
  74                          ocl::convertTypeStr(depth, wdepth, cn, cvt),
  75                          (int)wgs, wgs2_aligned, wdepth));
  76     if (k.empty())
  77         return false;
  78
  79     UMat src = _src.getUMat();
  80     result.create(1, 1, CV_32FC1);
  81
  82     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
  83             resarg = ocl::KernelArg::PtrWriteOnly(result);
  84
  85     k.args(srcarg, src.cols, (int)src.total(), resarg);
  86
  87     size_t globalsize = wgs;
  88     return k.run(1, &globalsize, &wgs, false);
  89 }
  90
  91 static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
  92 {
  93     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
  94     int wdepth = std::max(depth, CV_32S), wtype = CV_MAKE_TYPE(wdepth, cn);
  95
  96     char cvt[40];
  97     ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
  98                   format("-D CCORR -D T=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth=%d", ocl::typeToStr(type), ocl::typeToStr(wtype),
  99                          ocl::convertTypeStr(depth, wdepth, cn, cvt), cn, wdepth));
 100     if (k.empty())
 101         return false;
 102
 103     UMat image = _image.getUMat(), templ = _templ.getUMat();
 104     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
 105     UMat result = _result.getUMat();
 106
 107     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
 108            ocl::KernelArg::WriteOnly(result));
 109
 110     size_t globalsize[2] = { result.cols, result.rows };
 111     return k.run(2, globalsize, NULL, false);
 112 }
 113
 114 static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 115 {
 116     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 117
 118     int type = _image.type(), cn = CV_MAT_CN(type);
 119
 120     ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
 121                   format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
 122     if (k.empty())
 123         return false;
 124
 125     UMat image = _image.getUMat(), templ = _templ.getUMat();
 126     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
 127     UMat result = _result.getUMat();
 128
 129     UMat image_sums, image_sqsums;
 130     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
 131
 132     UMat templ_sqsum;
 133     if (!sumTemplate(templ, templ_sqsum))
 134         return false;
 135
 136     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
 137            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
 138
 139     size_t globalsize[2] = { result.cols, result.rows };
 140     return k.run(2, globalsize, NULL, false);
 141 }
 142
 143 ////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
 144
 145 static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
 146 {
 147     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 148     int wdepth = std::max(depth, CV_32S), wtype = CV_MAKE_TYPE(wdepth, cn);
 149
 150     char cvt[40];
 151     ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
 152                   format("-D SQDIFF -D T=%s -D WT=%s -D convertToWT=%s -D cn=%d -D wdepth=%d", ocl::typeToStr(type),
 153                          ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn, wdepth));
 154     if (k.empty())
 155         return false;
 156
 157     UMat image = _image.getUMat(), templ = _templ.getUMat();
 158     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 159     UMat result = _result.getUMat();
 160
 161     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
 162            ocl::KernelArg::WriteOnly(result));
 163
 164     size_t globalsize[2] = { result.cols, result.rows };
 165     return k.run(2, globalsize, NULL, false);
 166 }
 167
 168 static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 169 {
 170     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 171
 172     int type = _image.type(), cn = CV_MAT_CN(type);
 173
 174     ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
 175                   format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
 176     if (k.empty())
 177         return false;
 178
 179     UMat image = _image.getUMat(), templ = _templ.getUMat();
 180     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 181     UMat result = _result.getUMat();
 182
 183     UMat image_sums, image_sqsums;
 184     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
 185
 186     UMat templ_sqsum;
 187     if (!sumTemplate(_templ, templ_sqsum))
 188         return false;
 189
 190     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
 191            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
 192
 193     size_t globalsize[2] = { result.cols, result.rows };
 194     return k.run(2, globalsize, NULL, false);
 195 }
 196
 197 ///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
 198
 199 static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
 200 {
 201     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 202
 203     UMat image_sums, temp;
 204     integral(_image, temp);
 205
 206     if (temp.depth() == CV_64F)
 207         temp.convertTo(image_sums, CV_32F);
 208     else
 209         image_sums = temp;
 210
 211     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 212
 213     ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
 214                   format("-D CCOEFF -D T=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 215     if (k.empty())
 216         return false;
 217
 218     UMat templ = _templ.getUMat();
 219     Size size = _image.size(), tsize = templ.size();
 220     _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
 221     UMat result = _result.getUMat();
 222
 223     if (cn == 1)
 224     {
 225         float templ_sum = static_cast<float>(sum(_templ)[0]) / tsize.area();
 226
 227         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result),
 228                 templ.rows, templ.cols, templ_sum);
 229     }
 230     else
 231     {
 232         Vec4f templ_sum = Vec4f::all(0);
 233         templ_sum = sum(templ) / tsize.area();
 234
 235         if (cn == 2)
 236             k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
 237                    templ_sum[0], templ_sum[1]);
 238         else
 239             k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols,
 240                    templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3]);
 241     }
 242
 243     size_t globalsize[2] = { result.cols, result.rows };
 244     return k.run(2, globalsize, NULL, false);
 245 }
 246
 247 static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 248 {
 249     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 250
 251     UMat temp, image_sums, image_sqsums;
 252     integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
 253
 254     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 255
 256     ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
 257         format("-D CCOEFF_NORMED -D type=%s -D elem_type=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 258     if (k.empty())
 259         return false;
 260
 261     UMat templ = _templ.getUMat();
 262     Size size = _image.size(), tsize = templ.size();
 263     _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
 264     UMat result = _result.getUMat();
 265
 266     float scale = 1.f / tsize.area();
 267
 268     if (cn == 1)
 269     {
 270         float templ_sum = (float)sum(templ)[0];
 271
 272         multiply(templ, templ, temp, 1, CV_32F);
 273         float templ_sqsum = (float)sum(temp)[0];
 274
 275         templ_sqsum -= scale * templ_sum * templ_sum;
 276         templ_sum   *= scale;
 277
 278         if (templ_sqsum < DBL_EPSILON)
 279         {
 280             result = Scalar::all(1);
 281             return true;
 282         }
 283
 284         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 285                       ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
 286     }
 287     else
 288     {
 289         Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
 290         templ_sum = sum(templ);
 291
 292         multiply(templ, templ, temp, 1, CV_32F);
 293         templ_sqsum = sum(temp);
 294
 295         float templ_sqsum_sum = 0;
 296         for (int i = 0; i < cn; i ++)
 297             templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
 298
 299         templ_sum *= scale;
 300
 301         if (templ_sqsum_sum < DBL_EPSILON)
 302         {
 303             result = Scalar::all(1);
 304             return true;
 305         }
 306
 307         if (cn == 2)
 308             k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 309                    ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
 310                    templ_sum[0], templ_sum[1], templ_sqsum_sum);
 311         else
 312             k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 313                    ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
 314                    templ_sum[0], templ_sum[1], templ_sum[2], templ_sum[3], templ_sqsum_sum);
 315     }
 316
 317     size_t globalsize[2] = { result.cols, result.rows };
 318     return k.run(2, globalsize, NULL, false);
 319 }
 320
 321 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 322
 323 static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
 324 {
 325     int cn = _img.channels();
 326
 327     if (cn == 3 || cn > 4)
 328         return false;
 329
 330     typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
 331
 332     static const Caller callers[] =
 333     {
 334         matchTemplateNaive_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplateNaive_CCORR,
 335         matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
 336     };
 337     const Caller caller = callers[method];
 338
 339     return caller(_img, _templ, _result);
 340 }
 341
 342 #endif
 343
 344 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 345
 346 typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);
 347
 348 static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst)
 349 {
 350     if (src.channels()!= 1)
 351         return false;
 352
 353     IppStatus status;
 354
 355     IppiSize srcRoiSize = {src.cols,src.rows};
 356     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
 357
 358     Ipp8u *pBuffer;
 359     int bufSize=0;
 360
 361     int depth = src.depth();
 362
 363     ippimatchTemplate ippFunc =
 364             depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
 365             depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;
 366
 367     if (ippFunc==0)
 368         return false;
 369
 370     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
 371
 372     status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
 373     if ( status < 0 )
 374         return false;
 375
 376     pBuffer = ippsMalloc_8u( bufSize );
 377
 378     status = ippFunc(src.data, (int)src.step, srcRoiSize, tpl.data, (int)tpl.step, tplRoiSize, (Ipp32f*)dst.data, (int)dst.step, funCfg, pBuffer);
 379
 380     ippsFree( pBuffer );
 381     return status >= 0;
 382 }
 383
 384 static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
 385 {
 386     if (src.channels()!= 1)
 387         return false;
 388
 389     IppStatus status;
 390
 391     IppiSize srcRoiSize = {src.cols,src.rows};
 392     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
 393
 394     Ipp8u *pBuffer;
 395     int bufSize=0;
 396
 397     int depth = src.depth();
 398
 399     ippimatchTemplate ippFunc =
 400             depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
 401             depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;
 402
 403     if (ippFunc==0)
 404         return false;
 405
 406     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
 407
 408     status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
 409     if ( status < 0 )
 410         return false;
 411
 412     pBuffer = ippsMalloc_8u( bufSize );
 413
 414     status = ippFunc(src.data, (int)src.step, srcRoiSize, tpl.data, (int)tpl.step, tplRoiSize, (Ipp32f*)dst.data, (int)dst.step, funCfg, pBuffer);
 415
 416     ippsFree( pBuffer );
 417     return status >= 0;
 418 }
 419
 420 #endif
 421
 422 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
 423                 Size corrsize, int ctype,
 424                 Point anchor, double delta, int borderType )
 425 {
 426 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 427     if (ipp_crossCorr(img, _templ, corr))
 428         return;
 429 #endif
 430
 431     const double blockScale = 4.5;
 432     const int minBlockSize = 256;
 433     std::vector<uchar> buf;
 434
 435     Mat templ = _templ;
 436     int depth = img.depth(), cn = img.channels();
 437     int tdepth = templ.depth(), tcn = templ.channels();
 438     int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
 439
 440     CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
 441
 442     if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
 443     {
 444         _templ.convertTo(templ, std::max(CV_32F, depth));
 445         tdepth = templ.depth();
 446     }
 447
 448     CV_Assert( depth == tdepth || tdepth == CV_32F);
 449     CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
 450                corrsize.width <= img.cols + templ.cols - 1 );
 451
 452     CV_Assert( ccn == 1 || delta == 0 );
 453
 454     corr.create(corrsize, ctype);
 455
 456     int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
 457     Size blocksize, dftsize;
 458
 459     blocksize.width = cvRound(templ.cols*blockScale);
 460     blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
 461     blocksize.width = std::min( blocksize.width, corr.cols );
 462     blocksize.height = cvRound(templ.rows*blockScale);
 463     blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
 464     blocksize.height = std::min( blocksize.height, corr.rows );
 465
 466     dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
 467     dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
 468     if( dftsize.width <= 0 || dftsize.height <= 0 )
 469         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
 470
 471     // recompute block size
 472     blocksize.width = dftsize.width - templ.cols + 1;
 473     blocksize.width = MIN( blocksize.width, corr.cols );
 474     blocksize.height = dftsize.height - templ.rows + 1;
 475     blocksize.height = MIN( blocksize.height, corr.rows );
 476
 477     Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
 478     Mat dftImg( dftsize, maxDepth );
 479
 480     int i, k, bufSize = 0;
 481     if( tcn > 1 && tdepth != maxDepth )
 482         bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
 483
 484     if( cn > 1 && depth != maxDepth )
 485         bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
 486             (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
 487
 488     if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
 489         bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
 490
 491     buf.resize(bufSize);
 492
 493     // compute DFT of each template plane
 494     for( k = 0; k < tcn; k++ )
 495     {
 496         int yofs = k*dftsize.height;
 497         Mat src = templ;
 498         Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
 499         Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
 500
 501         if( tcn > 1 )
 502         {
 503             src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
 504             int pairs[] = {k, 0};
 505             mixChannels(&templ, 1, &src, 1, pairs, 1);
 506         }
 507
 508         if( dst1.data != src.data )
 509             src.convertTo(dst1, dst1.depth());
 510
 511         if( dst.cols > templ.cols )
 512         {
 513             Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
 514             part = Scalar::all(0);
 515         }
 516         dft(dst, dst, 0, templ.rows);
 517     }
 518
 519     int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
 520     int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
 521     int tileCount = tileCountX * tileCountY;
 522
 523     Size wholeSize = img.size();
 524     Point roiofs(0,0);
 525     Mat img0 = img;
 526
 527     if( !(borderType & BORDER_ISOLATED) )
 528     {
 529         img.locateROI(wholeSize, roiofs);
 530         img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
 531                        roiofs.x, wholeSize.width-img.cols-roiofs.x);
 532     }
 533     borderType |= BORDER_ISOLATED;
 534
 535     // calculate correlation by blocks
 536     for( i = 0; i < tileCount; i++ )
 537     {
 538         int x = (i%tileCountX)*blocksize.width;
 539         int y = (i/tileCountX)*blocksize.height;
 540
 541         Size bsz(std::min(blocksize.width, corr.cols - x),
 542                  std::min(blocksize.height, corr.rows - y));
 543         Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
 544         int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
 545         int x1 = std::max(0, x0), y1 = std::max(0, y0);
 546         int x2 = std::min(img0.cols, x0 + dsz.width);
 547         int y2 = std::min(img0.rows, y0 + dsz.height);
 548         Mat src0(img0, Range(y1, y2), Range(x1, x2));
 549         Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
 550         Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
 551         Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
 552
 553         for( k = 0; k < cn; k++ )
 554         {
 555             Mat src = src0;
 556             dftImg = Scalar::all(0);
 557
 558             if( cn > 1 )
 559             {
 560                 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
 561                 int pairs[] = {k, 0};
 562                 mixChannels(&src0, 1, &src, 1, pairs, 1);
 563             }
 564
 565             if( dst1.data != src.data )
 566                 src.convertTo(dst1, dst1.depth());
 567
 568             if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
 569                 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
 570                                x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
 571
 572             dft( dftImg, dftImg, 0, dsz.height );
 573             Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
 574                                          dftsize.width, dftsize.height));
 575             mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
 576             dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
 577
 578             src = dftImg(Rect(0, 0, bsz.width, bsz.height));
 579
 580             if( ccn > 1 )
 581             {
 582                 if( cdepth != maxDepth )
 583                 {
 584                     Mat plane(bsz, cdepth, &buf[0]);
 585                     src.convertTo(plane, cdepth, 1, delta);
 586                     src = plane;
 587                 }
 588                 int pairs[] = {0, k};
 589                 mixChannels(&src, 1, &cdst, 1, pairs, 1);
 590             }
 591             else
 592             {
 593                 if( k == 0 )
 594                     src.convertTo(cdst, cdepth, 1, delta);
 595                 else
 596                 {
 597                     if( maxDepth != cdepth )
 598                     {
 599                         Mat plane(bsz, cdepth, &buf[0]);
 600                         src.convertTo(plane, cdepth);
 601                         src = plane;
 602                     }
 603                     add(src, cdst, cdst);
 604                 }
 605             }
 606         }
 607     }
 608 }
 609 }
 610
 611 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 612
 613 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method )
 614 {
 615     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
 616     CV_Assert( (_img.depth() == CV_8U || _img.depth() == CV_32F) && _img.type() == _templ.type() && _img.dims() <= 2 );
 617
 618     bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
 619     if (needswap)
 620     {
 621         CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
 622     }
 623
 624     CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
 625                (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))
 626
 627     int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
 628                   method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
 629     bool isNormed = method == CV_TM_CCORR_NORMED ||
 630                     method == CV_TM_SQDIFF_NORMED ||
 631                     method == CV_TM_CCOEFF_NORMED;
 632
 633     Mat img = _img.getMat(), templ = _templ.getMat();
 634     if (needswap)
 635         std::swap(img, templ);
 636
 637     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
 638     _result.create(corrSize, CV_32F);
 639     Mat result = _result.getMat();
 640
 641 #ifdef HAVE_TEGRA_OPTIMIZATION
 642     if (tegra::matchTemplate(img, templ, result, method))
 643         return;
 644 #endif
 645
 646 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
 647     if (method == CV_TM_SQDIFF && ipp_sqrDistance(img, templ, result))
 648         return;
 649 #endif
 650
 651     int cn = img.channels();
 652     crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
 653
 654     if( method == CV_TM_CCORR )
 655         return;
 656
 657     double invArea = 1./((double)templ.rows * templ.cols);
 658
 659     Mat sum, sqsum;
 660     Scalar templMean, templSdv;
 661     double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
 662     double templNorm = 0, templSum2 = 0;
 663
 664     if( method == CV_TM_CCOEFF )
 665     {
 666         integral(img, sum, CV_64F);
 667         templMean = mean(templ);
 668     }
 669     else
 670     {
 671         integral(img, sum, sqsum, CV_64F);
 672         meanStdDev( templ, templMean, templSdv );
 673
 674         templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
 675
 676         if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
 677         {
 678             result = Scalar::all(1);
 679             return;
 680         }
 681
 682         templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
 683
 684         if( numType != 1 )
 685         {
 686             templMean = Scalar::all(0);
 687             templNorm = templSum2;
 688         }
 689
 690         templSum2 /= invArea;
 691         templNorm = std::sqrt(templNorm);
 692         templNorm /= std::sqrt(invArea); // care of accuracy here
 693
 694         q0 = (double*)sqsum.data;
 695         q1 = q0 + templ.cols*cn;
 696         q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
 697         q3 = q2 + templ.cols*cn;
 698     }
 699
 700     double* p0 = (double*)sum.data;
 701     double* p1 = p0 + templ.cols*cn;
 702     double* p2 = (double*)(sum.data + templ.rows*sum.step);
 703     double* p3 = p2 + templ.cols*cn;
 704
 705     int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
 706     int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
 707
 708     int i, j, k;
 709
 710     for( i = 0; i < result.rows; i++ )
 711     {
 712         float* rrow = (float*)(result.data + i*result.step);
 713         int idx = i * sumstep;
 714         int idx2 = i * sqstep;
 715
 716         for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
 717         {
 718             double num = rrow[j], t;
 719             double wndMean2 = 0, wndSum2 = 0;
 720
 721             if( numType == 1 )
 722             {
 723                 for( k = 0; k < cn; k++ )
 724                 {
 725                     t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
 726                     wndMean2 += t*t;
 727                     num -= t*templMean[k];
 728                 }
 729
 730                 wndMean2 *= invArea;
 731             }
 732
 733             if( isNormed || numType == 2 )
 734             {
 735                 for( k = 0; k < cn; k++ )
 736                 {
 737                     t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
 738                     wndSum2 += t;
 739                 }
 740
 741                 if( numType == 2 )
 742                 {
 743                     num = wndSum2 - 2*num + templSum2;
 744                     num = MAX(num, 0.);
 745                 }
 746             }
 747
 748             if( isNormed )
 749             {
 750                 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
 751                 if( fabs(num) < t )
 752                     num /= t;
 753                 else if( fabs(num) < t*1.125 )
 754                     num = num > 0 ? 1 : -1;
 755                 else
 756                     num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
 757             }
 758
 759             rrow[j] = (float)num;
 760         }
 761     }
 762 }
 763
 764
 765 CV_IMPL void
 766 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
 767 {
 768     cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
 769         result = cv::cvarrToMat(_result);
 770     CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
 771                                          std::abs(img.rows - templ.rows) + 1) &&
 772               result.type() == CV_32F );
 773     matchTemplate(img, templ, result, method);
 774 }
 775
 776 /* End of file. */