modules/imgproc/src/templmatch.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                        Intel License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
  14 // Third party copyrights are property of their respective owners.
  15 //
  16 // Redistribution and use in source and binary forms, with or without modification,
  17 // are permitted provided that the following conditions are met:
  18 //
  19 //   * Redistribution's of source code must retain the above copyright notice,
  20 //     this list of conditions and the following disclaimer.
  21 //
  22 //   * Redistribution's in binary form must reproduce the above copyright notice,
  23 //     this list of conditions and the following disclaimer in the documentation
  24 //     and/or other materials provided with the distribution.
  25 //
  26 //   * The name of Intel Corporation may not be used to endorse or promote products
  27 //     derived from this software without specific prior written permission.
  28 //
  29 // This software is provided by the copyright holders and contributors "as is" and
  30 // any express or implied warranties, including, but not limited to, the implied
  31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  32 // In no event shall the Intel Corporation or contributors be liable for any direct,
  33 // indirect, incidental, special, exemplary, or consequential damages
  34 // (including, but not limited to, procurement of substitute goods or services;
  35 // loss of use, data, or profits; or business interruption) however caused
  36 // and on any theory of liability, whether in contract, strict liability,
  37 // or tort (including negligence or otherwise) arising in any way out of
  38 // the use of this software, even if advised of the possibility of such damage.
  39 //
  40 //M*/
  41
  42 #include "precomp.hpp"
  43 #include "opencl_kernels_imgproc.hpp"
  44
  45 ////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
  46
  47 namespace cv
  48 {
  49
  50 #ifdef HAVE_OPENCL
  51
  52 /////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
  53
  54 enum
  55 {
  56     SUM_1 = 0, SUM_2 = 1
  57 };
  58
  59 static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn)
  60 {
  61     int depth = _image.depth();
  62
  63     ocl::Device dev = ocl::Device::getDefault();
  64     int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
  65
  66     ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d",
  67                                                                             ocl::typeToStr(depth), cn, pxPerWIy));
  68     if (k.empty())
  69         return false;
  70
  71     UMat image  = _image.getUMat();
  72     UMat result = _result.getUMat();
  73
  74
  75     size_t globalsize[2] = {(size_t)result.cols, ((size_t)result.rows+pxPerWIy-1)/pxPerWIy};
  76     return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false);
  77 }
  78
  79 static bool sumTemplate(InputArray _src, UMat & result)
  80 {
  81     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
  82     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
  83     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
  84
  85     int wgs2_aligned = 1;
  86     while (wgs2_aligned < (int)wgs)
  87         wgs2_aligned <<= 1;
  88     wgs2_aligned >>= 1;
  89
  90     char cvt[40];
  91     ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
  92                   format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
  93                          ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
  94                          ocl::convertTypeStr(depth, wdepth, cn, cvt),
  95                          (int)wgs, wgs2_aligned));
  96     if (k.empty())
  97         return false;
  98
  99     UMat src = _src.getUMat();
 100     result.create(1, 1, CV_32FC1);
 101
 102     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
 103             resarg = ocl::KernelArg::PtrWriteOnly(result);
 104
 105     k.args(srcarg, src.cols, (int)src.total(), resarg);
 106
 107     size_t globalsize = wgs;
 108     return k.run(1, &globalsize, &wgs, false);
 109 }
 110
 111 static bool useNaive(Size size)
 112 {
 113     int dft_size = 18;
 114     return size.height < dft_size && size.width < dft_size;
 115 }
 116
 117 struct ConvolveBuf
 118 {
 119     Size result_size;
 120     Size block_size;
 121     Size user_block_size;
 122     Size dft_size;
 123
 124     UMat image_spect, templ_spect, result_spect;
 125     UMat image_block, templ_block, result_data;
 126
 127     void create(Size image_size, Size templ_size);
 128 };
 129
 130 void ConvolveBuf::create(Size image_size, Size templ_size)
 131 {
 132     result_size = Size(image_size.width - templ_size.width + 1,
 133                        image_size.height - templ_size.height + 1);
 134
 135     const double blockScale = 4.5;
 136     const int minBlockSize = 256;
 137
 138     block_size.width = cvRound(templ_size.width*blockScale);
 139     block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
 140     block_size.width = std::min( block_size.width, result_size.width );
 141     block_size.height = cvRound(templ_size.height*blockScale);
 142     block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
 143     block_size.height = std::min( block_size.height, result_size.height );
 144
 145     dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
 146     dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
 147     if( dft_size.width <= 0 || dft_size.height <= 0 )
 148         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
 149
 150     // recompute block size
 151     block_size.width = dft_size.width - templ_size.width + 1;
 152     block_size.width = std::min( block_size.width, result_size.width);
 153     block_size.height = dft_size.height - templ_size.height + 1;
 154     block_size.height = std::min( block_size.height, result_size.height );
 155
 156     image_block.create(dft_size, CV_32F);
 157     templ_block.create(dft_size, CV_32F);
 158     result_data.create(dft_size, CV_32F);
 159
 160     image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
 161     templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
 162     result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
 163
 164     // Use maximum result matrix block size for the estimated DFT block size
 165     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
 166     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
 167 }
 168
 169 static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
 170 {
 171     ConvolveBuf buf;
 172     CV_Assert(_image.type() == CV_32F);
 173     CV_Assert(_templ.type() == CV_32F);
 174
 175     buf.create(_image.size(), _templ.size());
 176     _result.create(buf.result_size, CV_32F);
 177
 178     UMat image  = _image.getUMat();
 179     UMat templ  = _templ.getUMat();
 180
 181     UMat result = _result.getUMat();
 182
 183     Size& block_size = buf.block_size;
 184     Size& dft_size = buf.dft_size;
 185
 186     UMat& image_block = buf.image_block;
 187     UMat& templ_block = buf.templ_block;
 188     UMat& result_data = buf.result_data;
 189
 190     UMat& image_spect = buf.image_spect;
 191     UMat& templ_spect = buf.templ_spect;
 192     UMat& result_spect = buf.result_spect;
 193
 194     UMat templ_roi = templ;
 195     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
 196                    templ_block.cols - templ_roi.cols, BORDER_ISOLATED);
 197
 198     dft(templ_block, templ_spect, 0, templ.rows);
 199
 200     // Process all blocks of the result matrix
 201     for (int y = 0; y < result.rows; y += block_size.height)
 202     {
 203         for (int x = 0; x < result.cols; x += block_size.width)
 204         {
 205             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
 206                                 std::min(y + dft_size.height, image.rows) - y);
 207             Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
 208
 209             UMat image_roi(image, roi0);
 210
 211             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
 212                            0, image_block.cols - image_roi.cols, BORDER_ISOLATED);
 213
 214             dft(image_block, image_spect, 0);
 215
 216             mulSpectrums(image_spect, templ_spect, result_spect, 0, true);
 217
 218             dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
 219
 220             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
 221                                  std::min(y + block_size.height, result.rows) - y);
 222
 223             Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
 224             Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
 225
 226             UMat result_roi(result, roi1);
 227             UMat result_block(result_data, roi2);
 228
 229             result_block.copyTo(result_roi);
 230         }
 231     }
 232     return true;
 233 }
 234
 235 static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
 236 {
 237     _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);
 238
 239     if (_image.channels() == 1)
 240         return(convolve_dft(_image, _templ, _result));
 241     else
 242     {
 243         UMat image = _image.getUMat();
 244         UMat templ = _templ.getUMat();
 245         UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
 246         bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
 247         if (ok==false)
 248             return false;
 249         UMat result = _result.getUMat();
 250         return (extractFirstChannel_32F(result_, _result, _image.channels()));
 251     }
 252 }
 253
 254 static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
 255 {
 256     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 257     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
 258
 259     ocl::Device dev = ocl::Device::getDefault();
 260     int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
 261     int rated_cn = cn;
 262     int wtype1 = wtype;
 263
 264     if (pxPerWIx!=1)
 265     {
 266         rated_cn = pxPerWIx;
 267         type = CV_MAKE_TYPE(depth, rated_cn);
 268         wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
 269     }
 270
 271     char cvt[40];
 272     char cvt1[40];
 273     const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
 274     const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);
 275
 276     ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
 277                   format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
 278                          convertToWT, convertToWT1, cn, pxPerWIx));
 279     if (k.empty())
 280         return false;
 281
 282     UMat image = _image.getUMat(), templ = _templ.getUMat();
 283     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
 284     UMat result = _result.getUMat();
 285
 286     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
 287            ocl::KernelArg::WriteOnly(result));
 288
 289     size_t globalsize[2] = { ((size_t)result.cols+pxPerWIx-1)/pxPerWIx, (size_t)result.rows};
 290     return k.run(2, globalsize, NULL, false);
 291 }
 292
 293
 294 static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
 295 {
 296     if (useNaive(_templ.size()))
 297         return( matchTemplateNaive_CCORR(_image, _templ, _result));
 298     else
 299     {
 300         if(_image.depth() == CV_8U)
 301         {
 302             UMat imagef, templf;
 303             UMat image = _image.getUMat();
 304             UMat templ = _templ.getUMat();
 305             image.convertTo(imagef, CV_32F);
 306             templ.convertTo(templf, CV_32F);
 307             return(convolve_32F(imagef, templf, _result));
 308         }
 309         else
 310         {
 311             return(convolve_32F(_image, _templ, _result));
 312         }
 313     }
 314 }
 315
 316 static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 317 {
 318     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 319
 320     int type = _image.type(), cn = CV_MAT_CN(type);
 321
 322     ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
 323                   format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
 324     if (k.empty())
 325         return false;
 326
 327     UMat image = _image.getUMat(), templ = _templ.getUMat();
 328     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
 329     UMat result = _result.getUMat();
 330
 331     UMat image_sums, image_sqsums;
 332     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
 333
 334     UMat templ_sqsum;
 335     if (!sumTemplate(templ, templ_sqsum))
 336         return false;
 337
 338     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
 339            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
 340
 341     size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 342     return k.run(2, globalsize, NULL, false);
 343 }
 344
 345 ////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
 346
 347 static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
 348 {
 349     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 350     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
 351
 352     char cvt[40];
 353     ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
 354                   format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
 355                          ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
 356     if (k.empty())
 357         return false;
 358
 359     UMat image = _image.getUMat(), templ = _templ.getUMat();
 360     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 361     UMat result = _result.getUMat();
 362
 363     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
 364            ocl::KernelArg::WriteOnly(result));
 365
 366     size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 367     return k.run(2, globalsize, NULL, false);
 368 }
 369
 370 static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
 371 {
 372     if (useNaive(_templ.size()))
 373         return( matchTemplateNaive_SQDIFF(_image, _templ, _result));
 374     else
 375     {
 376         matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 377
 378         int type = _image.type(), cn = CV_MAT_CN(type);
 379
 380         ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc,
 381                   format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
 382         if (k.empty())
 383             return false;
 384
 385         UMat image = _image.getUMat(), templ = _templ.getUMat();
 386         _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 387         UMat result = _result.getUMat();
 388
 389         UMat image_sums, image_sqsums;
 390         integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
 391
 392         UMat templ_sqsum;
 393         if (!sumTemplate(_templ, templ_sqsum))
 394             return false;
 395
 396         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
 397            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
 398
 399         size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 400
 401         return k.run(2, globalsize, NULL, false);
 402     }
 403 }
 404
 405 static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 406 {
 407     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 408
 409     int type = _image.type(), cn = CV_MAT_CN(type);
 410
 411     ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
 412                   format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
 413     if (k.empty())
 414         return false;
 415
 416     UMat image = _image.getUMat(), templ = _templ.getUMat();
 417     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
 418     UMat result = _result.getUMat();
 419
 420     UMat image_sums, image_sqsums;
 421     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
 422
 423     UMat templ_sqsum;
 424     if (!sumTemplate(_templ, templ_sqsum))
 425         return false;
 426
 427     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
 428            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
 429
 430     size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 431
 432     return k.run(2, globalsize, NULL, false);
 433 }
 434
 435 ///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
 436
 437 static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
 438 {
 439     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 440
 441     UMat image_sums, temp;
 442     integral(_image, image_sums, CV_32F);
 443
 444     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 445
 446     ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
 447                   format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 448     if (k.empty())
 449         return false;
 450
 451     UMat templ  = _templ.getUMat();
 452     UMat result = _result.getUMat();
 453
 454     if (cn==1)
 455     {
 456         Scalar templMean = mean(templ);
 457         float templ_sum = (float)templMean[0];
 458
 459         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
 460     }
 461     else
 462     {
 463         Vec4f templ_sum = Vec4f::all(0);
 464         templ_sum = (Vec4f)mean(templ);
 465
 466        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);    }
 467
 468     size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 469     return k.run(2, globalsize, NULL, false);
 470 }
 471
 472 static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
 473 {
 474     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
 475
 476     UMat temp, image_sums, image_sqsums;
 477     integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
 478
 479     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 480     CV_Assert(cn >= 1 && cn <= 4);
 481
 482     ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
 483         format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
 484     if (k.empty())
 485         return false;
 486
 487     UMat templ = _templ.getUMat();
 488     Size size = _image.size(), tsize = templ.size();
 489     _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
 490     UMat result = _result.getUMat();
 491
 492     float scale = 1.f / tsize.area();
 493
 494     if (cn == 1)
 495     {
 496         float templ_sum = (float)sum(templ)[0];
 497
 498         multiply(templ, templ, temp, 1, CV_32F);
 499         float templ_sqsum = (float)sum(temp)[0];
 500
 501         templ_sqsum -= scale * templ_sum * templ_sum;
 502         templ_sum   *= scale;
 503
 504         if (templ_sqsum < DBL_EPSILON)
 505         {
 506             result = Scalar::all(1);
 507             return true;
 508         }
 509
 510         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 511                       ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
 512     }
 513     else
 514     {
 515         Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
 516         templ_sum = sum(templ);
 517
 518         multiply(templ, templ, temp, 1, CV_32F);
 519         templ_sqsum = sum(temp);
 520
 521         float templ_sqsum_sum = 0;
 522         for (int i = 0; i < cn; i ++)
 523             templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
 524
 525         templ_sum *= scale;
 526
 527         if (templ_sqsum_sum < DBL_EPSILON)
 528         {
 529             result = Scalar::all(1);
 530             return true;
 531         }
 532
 533         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
 534                    ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
 535                    templ_sum, templ_sqsum_sum);    }
 536
 537     size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
 538     return k.run(2, globalsize, NULL, false);
 539 }
 540
 541 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
 542
 543 static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
 544 {
 545     int cn = _img.channels();
 546
 547     if (cn > 4)
 548         return false;
 549
 550     typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
 551
 552     static const Caller callers[] =
 553     {
 554         matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
 555         matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
 556     };
 557     const Caller caller = callers[method];
 558
 559     return caller(_img, _templ, _result);
 560 }
 561
 562 #endif
 563
 564 #include "opencv2/core/hal/hal.hpp"
 565
 566 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
 567                 Size corrsize, int ctype,
 568                 Point anchor, double delta, int borderType )
 569 {
 570     const double blockScale = 4.5;
 571     const int minBlockSize = 256;
 572     std::vector<uchar> buf;
 573
 574     Mat templ = _templ;
 575     int depth = img.depth(), cn = img.channels();
 576     int tdepth = templ.depth(), tcn = templ.channels();
 577     int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
 578
 579     CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
 580
 581     if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
 582     {
 583         _templ.convertTo(templ, std::max(CV_32F, depth));
 584         tdepth = templ.depth();
 585     }
 586
 587     CV_Assert( depth == tdepth || tdepth == CV_32F);
 588     CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
 589                corrsize.width <= img.cols + templ.cols - 1 );
 590
 591     CV_Assert( ccn == 1 || delta == 0 );
 592
 593     corr.create(corrsize, ctype);
 594
 595     int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
 596     Size blocksize, dftsize;
 597
 598     blocksize.width = cvRound(templ.cols*blockScale);
 599     blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
 600     blocksize.width = std::min( blocksize.width, corr.cols );
 601     blocksize.height = cvRound(templ.rows*blockScale);
 602     blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
 603     blocksize.height = std::min( blocksize.height, corr.rows );
 604
 605     dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
 606     dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
 607     if( dftsize.width <= 0 || dftsize.height <= 0 )
 608         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
 609
 610     // recompute block size
 611     blocksize.width = dftsize.width - templ.cols + 1;
 612     blocksize.width = MIN( blocksize.width, corr.cols );
 613     blocksize.height = dftsize.height - templ.rows + 1;
 614     blocksize.height = MIN( blocksize.height, corr.rows );
 615
 616     Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
 617     Mat dftImg( dftsize, maxDepth );
 618
 619     int i, k, bufSize = 0;
 620     if( tcn > 1 && tdepth != maxDepth )
 621         bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
 622
 623     if( cn > 1 && depth != maxDepth )
 624         bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
 625             (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
 626
 627     if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
 628         bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
 629
 630     buf.resize(bufSize);
 631
 632     Ptr<hal::DFT2D> c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows);
 633
 634     // compute DFT of each template plane
 635     for( k = 0; k < tcn; k++ )
 636     {
 637         int yofs = k*dftsize.height;
 638         Mat src = templ;
 639         Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
 640         Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
 641
 642         if( tcn > 1 )
 643         {
 644             src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
 645             int pairs[] = {k, 0};
 646             mixChannels(&templ, 1, &src, 1, pairs, 1);
 647         }
 648
 649         if( dst1.data != src.data )
 650             src.convertTo(dst1, dst1.depth());
 651
 652         if( dst.cols > templ.cols )
 653         {
 654             Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
 655             part = Scalar::all(0);
 656         }
 657         c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step);
 658     }
 659
 660     int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
 661     int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
 662     int tileCount = tileCountX * tileCountY;
 663
 664     Size wholeSize = img.size();
 665     Point roiofs(0,0);
 666     Mat img0 = img;
 667
 668     if( !(borderType & BORDER_ISOLATED) )
 669     {
 670         img.locateROI(wholeSize, roiofs);
 671         img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
 672                        roiofs.x, wholeSize.width-img.cols-roiofs.x);
 673     }
 674     borderType |= BORDER_ISOLATED;
 675
 676     Ptr<hal::DFT2D> cF, cR;
 677     int f = CV_HAL_DFT_IS_INPLACE;
 678     int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE;
 679     cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1);
 680     cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height);
 681
 682     // calculate correlation by blocks
 683     for( i = 0; i < tileCount; i++ )
 684     {
 685         int x = (i%tileCountX)*blocksize.width;
 686         int y = (i/tileCountX)*blocksize.height;
 687
 688         Size bsz(std::min(blocksize.width, corr.cols - x),
 689                  std::min(blocksize.height, corr.rows - y));
 690         Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
 691         int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
 692         int x1 = std::max(0, x0), y1 = std::max(0, y0);
 693         int x2 = std::min(img0.cols, x0 + dsz.width);
 694         int y2 = std::min(img0.rows, y0 + dsz.height);
 695         Mat src0(img0, Range(y1, y2), Range(x1, x2));
 696         Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
 697         Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
 698         Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
 699
 700         for( k = 0; k < cn; k++ )
 701         {
 702             Mat src = src0;
 703             dftImg = Scalar::all(0);
 704
 705             if( cn > 1 )
 706             {
 707                 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
 708                 int pairs[] = {k, 0};
 709                 mixChannels(&src0, 1, &src, 1, pairs, 1);
 710             }
 711
 712             if( dst1.data != src.data )
 713                 src.convertTo(dst1, dst1.depth());
 714
 715             if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
 716                 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
 717                                x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
 718
 719             if (bsz.height == blocksize.height)
 720                 cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
 721             else
 722                 dft( dftImg, dftImg, 0, dsz.height );
 723
 724             Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
 725                                          dftsize.width, dftsize.height));
 726             mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
 727
 728             if (bsz.height == blocksize.height)
 729                 cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step);
 730             else
 731                 dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
 732
 733             src = dftImg(Rect(0, 0, bsz.width, bsz.height));
 734
 735             if( ccn > 1 )
 736             {
 737                 if( cdepth != maxDepth )
 738                 {
 739                     Mat plane(bsz, cdepth, &buf[0]);
 740                     src.convertTo(plane, cdepth, 1, delta);
 741                     src = plane;
 742                 }
 743                 int pairs[] = {0, k};
 744                 mixChannels(&src, 1, &cdst, 1, pairs, 1);
 745             }
 746             else
 747             {
 748                 if( k == 0 )
 749                     src.convertTo(cdst, cdepth, 1, delta);
 750                 else
 751                 {
 752                     if( maxDepth != cdepth )
 753                     {
 754                         Mat plane(bsz, cdepth, &buf[0]);
 755                         src.convertTo(plane, cdepth);
 756                         src = plane;
 757                     }
 758                     add(src, cdst, cdst);
 759                 }
 760             }
 761         }
 762     }
 763 }
 764
 765 static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
 766 {
 767     int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 768     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
 769     CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
 770
 771     Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
 772     int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
 773     int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
 774
 775     if (depth == CV_8U)
 776     {
 777         depth = CV_32F;
 778         type = CV_MAKETYPE(CV_32F, cn);
 779         img.convertTo(img, type, 1.0 / 255);
 780     }
 781
 782     if (tdepth == CV_8U)
 783     {
 784         tdepth = CV_32F;
 785         ttype = CV_MAKETYPE(CV_32F, tcn);
 786         templ.convertTo(templ, ttype, 1.0 / 255);
 787     }
 788
 789     if (mdepth == CV_8U)
 790     {
 791         mdepth = CV_32F;
 792         mtype = CV_MAKETYPE(CV_32F, mcn);
 793         compare(mask, Scalar::all(0), mask, CMP_NE);
 794         mask.convertTo(mask, mtype, 1.0 / 255);
 795     }
 796
 797     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
 798     _result.create(corrSize, CV_32F);
 799     Mat result = _result.getMat();
 800
 801     Mat img2 = img.mul(img);
 802     Mat mask2 = mask.mul(mask);
 803     Mat mask_templ = templ.mul(mask);
 804     Scalar templMean, templSdv;
 805
 806     double templSum2 = 0;
 807     meanStdDev( mask_templ, templMean, templSdv );
 808
 809     templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
 810     templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
 811     templSum2 *= ((double)templ.rows * templ.cols);
 812
 813     if (method == CV_TM_SQDIFF)
 814     {
 815         Mat mask2_templ = templ.mul(mask2);
 816
 817         Mat corr(corrSize, CV_32F);
 818         crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
 819         crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
 820
 821         result -= corr * 2;
 822         result += templSum2;
 823     }
 824     else if (method == CV_TM_CCORR_NORMED)
 825     {
 826         if (templSum2 < DBL_EPSILON)
 827         {
 828             result = Scalar::all(1);
 829             return;
 830         }
 831
 832         Mat corr(corrSize, CV_32F);
 833         crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
 834         crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
 835
 836         sqrt(corr, corr);
 837         result = result.mul(1/corr);
 838         result /= std::sqrt(templSum2);
 839     }
 840     else
 841         CV_Error(Error::StsNotImplemented, "");
 842 }
 843
 844 static void common_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, int cn )
 845 {
 846     if( method == CV_TM_CCORR )
 847         return;
 848
 849     int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
 850                   method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
 851     bool isNormed = method == CV_TM_CCORR_NORMED ||
 852                     method == CV_TM_SQDIFF_NORMED ||
 853                     method == CV_TM_CCOEFF_NORMED;
 854
 855     double invArea = 1./((double)templ.rows * templ.cols);
 856
 857     Mat sum, sqsum;
 858     Scalar templMean, templSdv;
 859     double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
 860     double templNorm = 0, templSum2 = 0;
 861
 862     if( method == CV_TM_CCOEFF )
 863     {
 864         integral(img, sum, CV_64F);
 865         templMean = mean(templ);
 866     }
 867     else
 868     {
 869         integral(img, sum, sqsum, CV_64F);
 870         meanStdDev( templ, templMean, templSdv );
 871
 872         templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
 873
 874         if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
 875         {
 876             result = Scalar::all(1);
 877             return;
 878         }
 879
 880         templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
 881
 882         if( numType != 1 )
 883         {
 884             templMean = Scalar::all(0);
 885             templNorm = templSum2;
 886         }
 887
 888         templSum2 /= invArea;
 889         templNorm = std::sqrt(templNorm);
 890         templNorm /= std::sqrt(invArea); // care of accuracy here
 891
 892         CV_Assert(sqsum.data != NULL);
 893         q0 = (double*)sqsum.data;
 894         q1 = q0 + templ.cols*cn;
 895         q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
 896         q3 = q2 + templ.cols*cn;
 897     }
 898
 899     CV_Assert(sum.data != NULL);
 900     double* p0 = (double*)sum.data;
 901     double* p1 = p0 + templ.cols*cn;
 902     double* p2 = (double*)(sum.data + templ.rows*sum.step);
 903     double* p3 = p2 + templ.cols*cn;
 904
 905     int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
 906     int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
 907
 908     int i, j, k;
 909
 910     for( i = 0; i < result.rows; i++ )
 911     {
 912         float* rrow = result.ptr<float>(i);
 913         int idx = i * sumstep;
 914         int idx2 = i * sqstep;
 915
 916         for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
 917         {
 918             double num = rrow[j], t;
 919             double wndMean2 = 0, wndSum2 = 0;
 920
 921             if( numType == 1 )
 922             {
 923                 for( k = 0; k < cn; k++ )
 924                 {
 925                     t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
 926                     wndMean2 += t*t;
 927                     num -= t*templMean[k];
 928                 }
 929
 930                 wndMean2 *= invArea;
 931             }
 932
 933             if( isNormed || numType == 2 )
 934             {
 935                 for( k = 0; k < cn; k++ )
 936                 {
 937                     t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
 938                     wndSum2 += t;
 939                 }
 940
 941                 if( numType == 2 )
 942                 {
 943                     num = wndSum2 - 2*num + templSum2;
 944                     num = MAX(num, 0.);
 945                 }
 946             }
 947
 948             if( isNormed )
 949             {
 950                 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
 951                 if( fabs(num) < t )
 952                     num /= t;
 953                 else if( fabs(num) < t*1.125 )
 954                     num = num > 0 ? 1 : -1;
 955                 else
 956                     num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
 957             }
 958
 959             rrow[j] = (float)num;
 960         }
 961     }
 962 }
 963 }
 964
 965
 966 #if defined HAVE_IPP
 967 namespace cv
 968 {
 969 typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);
 970
 971 static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst, bool normed)
 972 {
 973     CV_INSTRUMENT_REGION_IPP()
 974
 975     IppStatus status;
 976
 977     IppiSize srcRoiSize = {src.cols,src.rows};
 978     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
 979
 980     IppAutoBuffer<Ipp8u> buffer;
 981     int bufSize=0;
 982
 983     int depth = src.depth();
 984
 985     ippimatchTemplate ippiCrossCorrNorm =
 986             depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
 987             depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;
 988
 989     if (ippiCrossCorrNorm==0)
 990         return false;
 991
 992     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiROIValid);
 993     if(normed)
 994         funCfg |= ippiNorm;
 995     else
 996         funCfg |= ippiNormNone;
 997
 998     status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
 999     if ( status < 0 )
1000         return false;
1001
1002     buffer.allocate( bufSize );
1003
1004     status = CV_INSTRUMENT_FUN_IPP(ippiCrossCorrNorm, src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, buffer);
1005     return status >= 0;
1006 }
1007
1008 static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
1009 {
1010     CV_INSTRUMENT_REGION_IPP()
1011
1012     IppStatus status;
1013
1014     IppiSize srcRoiSize = {src.cols,src.rows};
1015     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
1016
1017     IppAutoBuffer<Ipp8u> buffer;
1018     int bufSize=0;
1019
1020     int depth = src.depth();
1021
1022     ippimatchTemplate ippiSqrDistanceNorm =
1023             depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
1024             depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;
1025
1026     if (ippiSqrDistanceNorm==0)
1027         return false;
1028
1029     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiROIValid | ippiNormNone);
1030     status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
1031     if ( status < 0 )
1032         return false;
1033
1034     buffer.allocate( bufSize );
1035
1036     status = CV_INSTRUMENT_FUN_IPP(ippiSqrDistanceNorm, src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, buffer);
1037     return status >= 0;
1038 }
1039
1040 static bool ipp_matchTemplate( Mat& img, Mat& templ, Mat& result, int method)
1041 {
1042     CV_INSTRUMENT_REGION_IPP()
1043
1044     if(img.channels() != 1)
1045         return false;
1046
1047     // These functions are not efficient if template size is comparable with image size
1048     if(templ.size().area()*4 > img.size().area())
1049         return false;
1050
1051     if(method == CV_TM_SQDIFF)
1052     {
1053         if(ipp_sqrDistance(img, templ, result))
1054             return true;
1055     }
1056     else if(method == CV_TM_SQDIFF_NORMED)
1057     {
1058         if(ipp_crossCorr(img, templ, result, false))
1059         {
1060             common_matchTemplate(img, templ, result, CV_TM_SQDIFF_NORMED, 1);
1061             return true;
1062         }
1063     }
1064     else if(method == CV_TM_CCORR)
1065     {
1066         if(ipp_crossCorr(img, templ, result, false))
1067             return true;
1068     }
1069     else if(method == CV_TM_CCORR_NORMED)
1070     {
1071         if(ipp_crossCorr(img, templ, result, true))
1072             return true;
1073     }
1074     else if(method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED)
1075     {
1076         if(ipp_crossCorr(img, templ, result, false))
1077         {
1078             common_matchTemplate(img, templ, result, method, 1);
1079             return true;
1080         }
1081     }
1082
1083     return false;
1084 }
1085 }
1086 #endif
1087
1088 ////////////////////////////////////////////////////////////////////////////////////////////////////////
1089
1090 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
1091 {
1092     CV_INSTRUMENT_REGION()
1093
1094     if (!_mask.empty())
1095     {
1096         cv::matchTemplateMask(_img, _templ, _result, method, _mask);
1097         return;
1098     }
1099
1100     int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1101     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
1102     CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
1103
1104     bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
1105     if (needswap)
1106     {
1107         CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
1108     }
1109
1110     CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
1111                (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))
1112
1113     Mat img = _img.getMat(), templ = _templ.getMat();
1114     if (needswap)
1115         std::swap(img, templ);
1116
1117     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
1118     _result.create(corrSize, CV_32F);
1119     Mat result = _result.getMat();
1120
1121     CV_IPP_RUN_FAST(ipp_matchTemplate(img, templ, result, method))
1122
1123     crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
1124
1125     common_matchTemplate(img, templ, result, method, cn);
1126 }
1127
1128 CV_IMPL void
1129 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
1130 {
1131     cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
1132         result = cv::cvarrToMat(_result);
1133     CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
1134                                          std::abs(img.rows - templ.rows) + 1) &&
1135               result.type() == CV_32F );
1136     matchTemplate(img, templ, result, method);
1137 }
1138
1139 /* End of file. */