modules/core/src/stat.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include <climits>
  45
  46 namespace cv
  47 {
  48
  49 template<typename T> static inline Scalar rawToScalar(const T& v)
  50 {
  51     Scalar s;
  52     typedef typename DataType<T>::channel_type T1;
  53     int i, n = DataType<T>::channels;
  54     for( i = 0; i < n; i++ )
  55         s.val[i] = ((T1*)&v)[i];
  56     return s;
  57 }
  58
  59 /****************************************************************************************\
  60 *                                        sum                                             *
  61 \****************************************************************************************/
  62
  63 template<typename T, typename ST>
  64 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
  65 {
  66     const T* src = src0;
  67     if( !mask )
  68     {
  69         int i=0;
  70         int k = cn % 4;
  71         if( k == 1 )
  72         {
  73             ST s0 = dst[0];
  74
  75                         #if CV_ENABLE_UNROLLED
  76             for(; i <= len - 4; i += 4, src += cn*4 )
  77                 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
  78             #endif
  79             for( ; i < len; i++, src += cn )
  80                 s0 += src[0];
  81             dst[0] = s0;
  82         }
  83         else if( k == 2 )
  84         {
  85             ST s0 = dst[0], s1 = dst[1];
  86             for( i = 0; i < len; i++, src += cn )
  87             {
  88                 s0 += src[0];
  89                 s1 += src[1];
  90             }
  91             dst[0] = s0;
  92             dst[1] = s1;
  93         }
  94         else if( k == 3 )
  95         {
  96             ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
  97             for( i = 0; i < len; i++, src += cn )
  98             {
  99                 s0 += src[0];
 100                 s1 += src[1];
 101                 s2 += src[2];
 102             }
 103             dst[0] = s0;
 104             dst[1] = s1;
 105             dst[2] = s2;
 106         }
 107
 108         for( ; k < cn; k += 4 )
 109         {
 110             src = src0 + k;
 111             ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
 112             for( i = 0; i < len; i++, src += cn )
 113             {
 114                 s0 += src[0]; s1 += src[1];
 115                 s2 += src[2]; s3 += src[3];
 116             }
 117             dst[k] = s0;
 118             dst[k+1] = s1;
 119             dst[k+2] = s2;
 120             dst[k+3] = s3;
 121         }
 122         return len;
 123     }
 124
 125     int i, nzm = 0;
 126     if( cn == 1 )
 127     {
 128         ST s = dst[0];
 129         for( i = 0; i < len; i++ )
 130             if( mask[i] )
 131             {
 132                 s += src[i];
 133                 nzm++;
 134             }
 135         dst[0] = s;
 136     }
 137     else if( cn == 3 )
 138     {
 139         ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
 140         for( i = 0; i < len; i++, src += 3 )
 141             if( mask[i] )
 142             {
 143                 s0 += src[0];
 144                 s1 += src[1];
 145                 s2 += src[2];
 146                 nzm++;
 147             }
 148         dst[0] = s0;
 149         dst[1] = s1;
 150         dst[2] = s2;
 151     }
 152     else
 153     {
 154         for( i = 0; i < len; i++, src += cn )
 155             if( mask[i] )
 156             {
 157                 int k = 0;
 158                                 #if CV_ENABLE_UNROLLED
 159                 for( ; k <= cn - 4; k += 4 )
 160                 {
 161                     ST s0, s1;
 162                     s0 = dst[k] + src[k];
 163                     s1 = dst[k+1] + src[k+1];
 164                     dst[k] = s0; dst[k+1] = s1;
 165                     s0 = dst[k+2] + src[k+2];
 166                     s1 = dst[k+3] + src[k+3];
 167                     dst[k+2] = s0; dst[k+3] = s1;
 168                 }
 169                 #endif
 170                 for( ; k < cn; k++ )
 171                     dst[k] += src[k];
 172                 nzm++;
 173             }
 174     }
 175     return nzm;
 176 }
 177
 178
 179 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
 180 { return sum_(src, mask, dst, len, cn); }
 181
 182 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
 183 { return sum_(src, mask, dst, len, cn); }
 184
 185 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
 186 { return sum_(src, mask, dst, len, cn); }
 187
 188 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
 189 { return sum_(src, mask, dst, len, cn); }
 190
 191 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
 192 { return sum_(src, mask, dst, len, cn); }
 193
 194 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
 195 { return sum_(src, mask, dst, len, cn); }
 196
 197 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
 198 { return sum_(src, mask, dst, len, cn); }
 199
 200 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
 201
 202 static SumFunc sumTab[] =
 203 {
 204     (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
 205     (SumFunc)sum16u, (SumFunc)sum16s,
 206     (SumFunc)sum32s,
 207     (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
 208     0
 209 };
 210
 211 template<typename T>
 212 static int countNonZero_(const T* src, int len )
 213 {
 214     int i=0, nz = 0;
 215         #if CV_ENABLE_UNROLLED
 216     for(; i <= len - 4; i += 4 )
 217         nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
 218     #endif
 219     for( ; i < len; i++ )
 220         nz += src[i] != 0;
 221     return nz;
 222 }
 223
 224 static int countNonZero8u( const uchar* src, int len )
 225 { return countNonZero_(src, len); }
 226
 227 static int countNonZero16u( const ushort* src, int len )
 228 { return countNonZero_(src, len); }
 229
 230 static int countNonZero32s( const int* src, int len )
 231 { return countNonZero_(src, len); }
 232
 233 static int countNonZero32f( const float* src, int len )
 234 { return countNonZero_(src, len); }
 235
 236 static int countNonZero64f( const double* src, int len )
 237 { return countNonZero_(src, len); }
 238
 239 typedef int (*CountNonZeroFunc)(const uchar*, int);
 240
 241 static CountNonZeroFunc countNonZeroTab[] =
 242 {
 243     (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
 244     (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
 245     (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
 246     (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
 247 };
 248
 249
 250 template<typename T, typename ST, typename SQT>
 251 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
 252 {
 253     const T* src = src0;
 254
 255     if( !mask )
 256     {
 257         int i;
 258         int k = cn % 4;
 259
 260         if( k == 1 )
 261         {
 262             ST s0 = sum[0];
 263             SQT sq0 = sqsum[0];
 264             for( i = 0; i < len; i++, src += cn )
 265             {
 266                 T v = src[0];
 267                 s0 += v; sq0 += (SQT)v*v;
 268             }
 269             sum[0] = s0;
 270             sqsum[0] = sq0;
 271         }
 272         else if( k == 2 )
 273         {
 274             ST s0 = sum[0], s1 = sum[1];
 275             SQT sq0 = sqsum[0], sq1 = sqsum[1];
 276             for( i = 0; i < len; i++, src += cn )
 277             {
 278                 T v0 = src[0], v1 = src[1];
 279                 s0 += v0; sq0 += (SQT)v0*v0;
 280                 s1 += v1; sq1 += (SQT)v1*v1;
 281             }
 282             sum[0] = s0; sum[1] = s1;
 283             sqsum[0] = sq0; sqsum[1] = sq1;
 284         }
 285         else if( k == 3 )
 286         {
 287             ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
 288             SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
 289             for( i = 0; i < len; i++, src += cn )
 290             {
 291                 T v0 = src[0], v1 = src[1], v2 = src[2];
 292                 s0 += v0; sq0 += (SQT)v0*v0;
 293                 s1 += v1; sq1 += (SQT)v1*v1;
 294                 s2 += v2; sq2 += (SQT)v2*v2;
 295             }
 296             sum[0] = s0; sum[1] = s1; sum[2] = s2;
 297             sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
 298         }
 299
 300         for( ; k < cn; k += 4 )
 301         {
 302             src = src0 + k;
 303             ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
 304             SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
 305             for( i = 0; i < len; i++, src += cn )
 306             {
 307                 T v0, v1;
 308                 v0 = src[0], v1 = src[1];
 309                 s0 += v0; sq0 += (SQT)v0*v0;
 310                 s1 += v1; sq1 += (SQT)v1*v1;
 311                 v0 = src[2], v1 = src[3];
 312                 s2 += v0; sq2 += (SQT)v0*v0;
 313                 s3 += v1; sq3 += (SQT)v1*v1;
 314             }
 315             sum[k] = s0; sum[k+1] = s1;
 316             sum[k+2] = s2; sum[k+3] = s3;
 317             sqsum[k] = sq0; sqsum[k+1] = sq1;
 318             sqsum[k+2] = sq2; sqsum[k+3] = sq3;
 319         }
 320         return len;
 321     }
 322
 323     int i, nzm = 0;
 324
 325     if( cn == 1 )
 326     {
 327         ST s0 = sum[0];
 328         SQT sq0 = sqsum[0];
 329         for( i = 0; i < len; i++ )
 330             if( mask[i] )
 331             {
 332                 T v = src[i];
 333                 s0 += v; sq0 += (SQT)v*v;
 334                 nzm++;
 335             }
 336         sum[0] = s0;
 337         sqsum[0] = sq0;
 338     }
 339     else if( cn == 3 )
 340     {
 341         ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
 342         SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
 343         for( i = 0; i < len; i++, src += 3 )
 344             if( mask[i] )
 345             {
 346                 T v0 = src[0], v1 = src[1], v2 = src[2];
 347                 s0 += v0; sq0 += (SQT)v0*v0;
 348                 s1 += v1; sq1 += (SQT)v1*v1;
 349                 s2 += v2; sq2 += (SQT)v2*v2;
 350                 nzm++;
 351             }
 352         sum[0] = s0; sum[1] = s1; sum[2] = s2;
 353         sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
 354     }
 355     else
 356     {
 357         for( i = 0; i < len; i++, src += cn )
 358             if( mask[i] )
 359             {
 360                 for( int k = 0; k < cn; k++ )
 361                 {
 362                     T v = src[k];
 363                     ST s = sum[k] + v;
 364                     SQT sq = sqsum[k] + (SQT)v*v;
 365                     sum[k] = s; sqsum[k] = sq;
 366                 }
 367                 nzm++;
 368             }
 369     }
 370     return nzm;
 371 }
 372
 373
 374 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
 375 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 376
 377 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
 378 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 379
 380 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
 381 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 382
 383 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
 384 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 385
 386 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
 387 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 388
 389 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
 390 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 391
 392 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
 393 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
 394
 395 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
 396
 397 static SumSqrFunc sumSqrTab[] =
 398 {
 399     (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
 400     (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
 401 };
 402
 403 }
 404
 405 cv::Scalar cv::sum( InputArray _src )
 406 {
 407     Mat src = _src.getMat();
 408     int k, cn = src.channels(), depth = src.depth();
 409     SumFunc func = sumTab[depth];
 410
 411     CV_Assert( cn <= 4 && func != 0 );
 412
 413     const Mat* arrays[] = {&src, 0};
 414     uchar* ptrs[1];
 415     NAryMatIterator it(arrays, ptrs);
 416     Scalar s;
 417     int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
 418     int j, count = 0;
 419     AutoBuffer<int> _buf;
 420     int* buf = (int*)&s[0];
 421     size_t esz = 0;
 422     bool blockSum = depth < CV_32S;
 423
 424     if( blockSum )
 425     {
 426         intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
 427         blockSize = std::min(blockSize, intSumBlockSize);
 428         _buf.allocate(cn);
 429         buf = _buf;
 430
 431         for( k = 0; k < cn; k++ )
 432             buf[k] = 0;
 433         esz = src.elemSize();
 434     }
 435
 436     for( size_t i = 0; i < it.nplanes; i++, ++it )
 437     {
 438         for( j = 0; j < total; j += blockSize )
 439         {
 440             int bsz = std::min(total - j, blockSize);
 441             func( ptrs[0], 0, (uchar*)buf, bsz, cn );
 442             count += bsz;
 443             if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
 444             {
 445                 for( k = 0; k < cn; k++ )
 446                 {
 447                     s[k] += buf[k];
 448                     buf[k] = 0;
 449                 }
 450                 count = 0;
 451             }
 452             ptrs[0] += bsz*esz;
 453         }
 454     }
 455     return s;
 456 }
 457
 458 int cv::countNonZero( InputArray _src )
 459 {
 460     Mat src = _src.getMat();
 461     CountNonZeroFunc func = countNonZeroTab[src.depth()];
 462
 463     CV_Assert( src.channels() == 1 && func != 0 );
 464
 465     const Mat* arrays[] = {&src, 0};
 466     uchar* ptrs[1];
 467     NAryMatIterator it(arrays, ptrs);
 468     int total = (int)it.size, nz = 0;
 469
 470     for( size_t i = 0; i < it.nplanes; i++, ++it )
 471         nz += func( ptrs[0], total );
 472
 473     return nz;
 474 }
 475
 476 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
 477 {
 478     Mat src = _src.getMat(), mask = _mask.getMat();
 479     CV_Assert( mask.empty() || mask.type() == CV_8U );
 480
 481     int k, cn = src.channels(), depth = src.depth();
 482     SumFunc func = sumTab[depth];
 483
 484     CV_Assert( cn <= 4 && func != 0 );
 485
 486     const Mat* arrays[] = {&src, &mask, 0};
 487     uchar* ptrs[2];
 488     NAryMatIterator it(arrays, ptrs);
 489     Scalar s;
 490     int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
 491     int j, count = 0;
 492     AutoBuffer<int> _buf;
 493     int* buf = (int*)&s[0];
 494     bool blockSum = depth <= CV_16S;
 495     size_t esz = 0, nz0 = 0;
 496
 497     if( blockSum )
 498     {
 499         intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
 500         blockSize = std::min(blockSize, intSumBlockSize);
 501         _buf.allocate(cn);
 502         buf = _buf;
 503
 504         for( k = 0; k < cn; k++ )
 505             buf[k] = 0;
 506         esz = src.elemSize();
 507     }
 508
 509     for( size_t i = 0; i < it.nplanes; i++, ++it )
 510     {
 511         for( j = 0; j < total; j += blockSize )
 512         {
 513             int bsz = std::min(total - j, blockSize);
 514             int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
 515             count += nz;
 516             nz0 += nz;
 517             if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
 518             {
 519                 for( k = 0; k < cn; k++ )
 520                 {
 521                     s[k] += buf[k];
 522                     buf[k] = 0;
 523                 }
 524                 count = 0;
 525             }
 526             ptrs[0] += bsz*esz;
 527             if( ptrs[1] )
 528                 ptrs[1] += bsz;
 529         }
 530     }
 531     return s*(nz0 ? 1./nz0 : 0);
 532 }
 533
 534
 535 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
 536 {
 537     Mat src = _src.getMat(), mask = _mask.getMat();
 538     CV_Assert( mask.empty() || mask.type() == CV_8U );
 539
 540     int k, cn = src.channels(), depth = src.depth();
 541     SumSqrFunc func = sumSqrTab[depth];
 542
 543     CV_Assert( func != 0 );
 544
 545     const Mat* arrays[] = {&src, &mask, 0};
 546     uchar* ptrs[2];
 547     NAryMatIterator it(arrays, ptrs);
 548     int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
 549     int j, count = 0, nz0 = 0;
 550     AutoBuffer<double> _buf(cn*4);
 551     double *s = (double*)_buf, *sq = s + cn;
 552     int *sbuf = (int*)s, *sqbuf = (int*)sq;
 553     bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
 554     size_t esz = 0;
 555
 556     for( k = 0; k < cn; k++ )
 557         s[k] = sq[k] = 0;
 558
 559     if( blockSum )
 560     {
 561         intSumBlockSize = 1 << 15;
 562         blockSize = std::min(blockSize, intSumBlockSize);
 563         sbuf = (int*)(sq + cn);
 564         if( blockSqSum )
 565             sqbuf = sbuf + cn;
 566         for( k = 0; k < cn; k++ )
 567             sbuf[k] = sqbuf[k] = 0;
 568         esz = src.elemSize();
 569     }
 570
 571     for( size_t i = 0; i < it.nplanes; i++, ++it )
 572     {
 573         for( j = 0; j < total; j += blockSize )
 574         {
 575             int bsz = std::min(total - j, blockSize);
 576             int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
 577             count += nz;
 578             nz0 += nz;
 579             if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
 580             {
 581                 for( k = 0; k < cn; k++ )
 582                 {
 583                     s[k] += sbuf[k];
 584                     sbuf[k] = 0;
 585                 }
 586                 if( blockSqSum )
 587                 {
 588                     for( k = 0; k < cn; k++ )
 589                     {
 590                         sq[k] += sqbuf[k];
 591                         sqbuf[k] = 0;
 592                     }
 593                 }
 594                 count = 0;
 595             }
 596             ptrs[0] += bsz*esz;
 597             if( ptrs[1] )
 598                 ptrs[1] += bsz;
 599         }
 600     }
 601
 602     double scale = nz0 ? 1./nz0 : 0.;
 603     for( k = 0; k < cn; k++ )
 604     {
 605         s[k] *= scale;
 606         sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
 607     }
 608
 609     for( j = 0; j < 2; j++ )
 610     {
 611         const double* sptr = j == 0 ? s : sq;
 612         _OutputArray _dst = j == 0 ? _mean : _sdv;
 613         if( !_dst.needed() )
 614             continue;
 615
 616         if( !_dst.fixedSize() )
 617             _dst.create(cn, 1, CV_64F, -1, true);
 618         Mat dst = _dst.getMat();
 619         int dcn = (int)dst.total();
 620         CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
 621                    (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
 622         double* dptr = dst.ptr<double>();
 623         for( k = 0; k < cn; k++ )
 624             dptr[k] = sptr[k];
 625         for( ; k < dcn; k++ )
 626             dptr[k] = 0;
 627     }
 628 }
 629
 630 /****************************************************************************************\
 631 *                                       minMaxLoc                                        *
 632 \****************************************************************************************/
 633
 634 namespace cv
 635 {
 636
 637 template<typename T, typename WT> static void
 638 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
 639             size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
 640 {
 641     WT minVal = *_minVal, maxVal = *_maxVal;
 642     size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
 643
 644     if( !mask )
 645     {
 646         for( int i = 0; i < len; i++ )
 647         {
 648             T val = src[i];
 649             if( val < minVal )
 650             {
 651                 minVal = val;
 652                 minIdx = startIdx + i;
 653             }
 654             if( val > maxVal )
 655             {
 656                 maxVal = val;
 657                 maxIdx = startIdx + i;
 658             }
 659         }
 660     }
 661     else
 662     {
 663         for( int i = 0; i < len; i++ )
 664         {
 665             T val = src[i];
 666             if( mask[i] && val < minVal )
 667             {
 668                 minVal = val;
 669                 minIdx = startIdx + i;
 670             }
 671             if( mask[i] && val > maxVal )
 672             {
 673                 maxVal = val;
 674                 maxIdx = startIdx + i;
 675             }
 676         }
 677     }
 678
 679     *_minIdx = minIdx;
 680     *_maxIdx = maxIdx;
 681     *_minVal = minVal;
 682     *_maxVal = maxVal;
 683 }
 684
 685 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
 686                          size_t* minidx, size_t* maxidx, int len, size_t startidx )
 687 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 688
 689 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
 690                          size_t* minidx, size_t* maxidx, int len, size_t startidx )
 691 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 692
 693 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
 694                           size_t* minidx, size_t* maxidx, int len, size_t startidx )
 695 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 696
 697 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
 698                           size_t* minidx, size_t* maxidx, int len, size_t startidx )
 699 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 700
 701 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
 702                           size_t* minidx, size_t* maxidx, int len, size_t startidx )
 703 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 704
 705 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
 706                           size_t* minidx, size_t* maxidx, int len, size_t startidx )
 707 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 708
 709 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
 710                           size_t* minidx, size_t* maxidx, int len, size_t startidx )
 711 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
 712
 713 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
 714
 715 static MinMaxIdxFunc minmaxTab[] =
 716 {
 717     (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
 718     (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
 719     (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
 720     (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
 721     0
 722 };
 723
 724 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
 725 {
 726     int i, d = a.dims;
 727     if( ofs > 0 )
 728     {
 729         ofs--;
 730         for( i = d-1; i >= 0; i-- )
 731         {
 732             int sz = a.size[i];
 733             idx[i] = (int)(ofs % sz);
 734             ofs /= sz;
 735         }
 736     }
 737     else
 738     {
 739         for( i = d-1; i >= 0; i-- )
 740             idx[i] = -1;
 741     }
 742 }
 743
 744 }
 745
 746 void cv::minMaxIdx(InputArray _src, double* minVal,
 747                    double* maxVal, int* minIdx, int* maxIdx,
 748                    InputArray _mask)
 749 {
 750     Mat src = _src.getMat(), mask = _mask.getMat();
 751     int depth = src.depth(), cn = src.channels();
 752
 753     CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) ||
 754                (cn >= 1 && mask.empty() && !minIdx && !maxIdx) );
 755     MinMaxIdxFunc func = minmaxTab[depth];
 756     CV_Assert( func != 0 );
 757
 758     const Mat* arrays[] = {&src, &mask, 0};
 759     uchar* ptrs[2];
 760     NAryMatIterator it(arrays, ptrs);
 761
 762     size_t minidx = 0, maxidx = 0;
 763     int iminval = INT_MAX, imaxval = INT_MIN;
 764     float fminval = FLT_MAX, fmaxval = -FLT_MAX;
 765     double dminval = DBL_MAX, dmaxval = -DBL_MAX;
 766     size_t startidx = 1;
 767     int *minval = &iminval, *maxval = &imaxval;
 768     int planeSize = (int)it.size*cn;
 769
 770     if( depth == CV_32F )
 771         minval = (int*)&fminval, maxval = (int*)&fmaxval;
 772     else if( depth == CV_64F )
 773         minval = (int*)&dminval, maxval = (int*)&dmaxval;
 774
 775     for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
 776         func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
 777
 778     if( minidx == 0 )
 779         dminval = dmaxval = 0;
 780     else if( depth == CV_32F )
 781         dminval = fminval, dmaxval = fmaxval;
 782     else if( depth <= CV_32S )
 783         dminval = iminval, dmaxval = imaxval;
 784
 785     if( minVal )
 786         *minVal = dminval;
 787     if( maxVal )
 788         *maxVal = dmaxval;
 789
 790     if( minIdx )
 791         ofs2idx(src, minidx, minIdx);
 792     if( maxIdx )
 793         ofs2idx(src, maxidx, maxIdx);
 794 }
 795
 796 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
 797                     Point* minLoc, Point* maxLoc, InputArray mask )
 798 {
 799     Mat img = _img.getMat();
 800     CV_Assert(img.dims <= 2);
 801
 802     minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
 803     if( minLoc )
 804         std::swap(minLoc->x, minLoc->y);
 805     if( maxLoc )
 806         std::swap(maxLoc->x, maxLoc->y);
 807 }
 808
 809 /****************************************************************************************\
 810 *                                         norm                                           *
 811 \****************************************************************************************/
 812
 813 namespace cv
 814 {
 815
 816 float normL2Sqr_(const float* a, const float* b, int n)
 817 {
 818     int j = 0; float d = 0.f;
 819 #if CV_SSE
 820     if( USE_SSE2 )
 821     {
 822         float CV_DECL_ALIGNED(16) buf[4];
 823         __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
 824
 825         for( ; j <= n - 8; j += 8 )
 826         {
 827             __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
 828             __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
 829             d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
 830             d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
 831         }
 832         _mm_store_ps(buf, _mm_add_ps(d0, d1));
 833         d = buf[0] + buf[1] + buf[2] + buf[3];
 834     }
 835     else
 836 #endif
 837     //vz why do we need unroll here? no sse = no need to unroll
 838         {
 839         for( ; j <= n - 4; j += 4 )
 840         {
 841             float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
 842             d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
 843         }
 844     }
 845
 846     for( ; j < n; j++ )
 847     {
 848         float t = a[j] - b[j];
 849         d += t*t;
 850     }
 851     return d;
 852 }
 853
 854
 855 float normL1_(const float* a, const float* b, int n)
 856 {
 857     int j = 0; float d = 0.f;
 858 #if CV_SSE
 859     if( USE_SSE2 )
 860     {
 861         float CV_DECL_ALIGNED(16) buf[4];
 862         static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
 863         __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
 864         __m128 absmask = _mm_load_ps((const float*)absbuf);
 865
 866         for( ; j <= n - 8; j += 8 )
 867         {
 868             __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
 869             __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
 870             d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
 871             d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
 872         }
 873         _mm_store_ps(buf, _mm_add_ps(d0, d1));
 874         d = buf[0] + buf[1] + buf[2] + buf[3];
 875     }
 876     else
 877 #endif
 878      //vz no need to unroll here - if no sse
 879     {
 880         for( ; j <= n - 4; j += 4 )
 881         {
 882             d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
 883                     std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
 884         }
 885     }
 886
 887     for( ; j < n; j++ )
 888         d += std::abs(a[j] - b[j]);
 889     return d;
 890 }
 891
 892 int normL1_(const uchar* a, const uchar* b, int n)
 893 {
 894     int j = 0, d = 0;
 895 #if CV_SSE
 896     if( USE_SSE2 )
 897     {
 898         __m128i d0 = _mm_setzero_si128();
 899
 900         for( ; j <= n - 16; j += 16 )
 901         {
 902             __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
 903             __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
 904
 905             d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
 906         }
 907
 908         for( ; j <= n - 4; j += 4 )
 909         {
 910             __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
 911             __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
 912
 913             d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
 914         }
 915         d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
 916     }
 917     else
 918 #endif
 919      //vz why do we need unroll here? no sse = no unroll
 920     {
 921         for( ; j <= n - 4; j += 4 )
 922         {
 923             d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
 924                     std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
 925         }
 926     }
 927     for( ; j < n; j++ )
 928         d += std::abs(a[j] - b[j]);
 929     return d;
 930 }
 931
 932 static const uchar popCountTable[] =
 933 {
 934     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
 935     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 936     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 937     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 938     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
 939     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 940     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
 941     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 942 };
 943
 944 static const uchar popCountTable2[] =
 945 {
 946     0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
 947     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
 948     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
 949     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
 950     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
 951     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
 952     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
 953     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
 954 };
 955
 956 static const uchar popCountTable4[] =
 957 {
 958     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 959     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 960     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 961     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 962     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 963     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 964     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 965     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
 966 };
 967
 968 int normHamming(const uchar* a, const uchar* b, int n)
 969 {
 970     int i = 0, result = 0;
 971 #if CV_NEON
 972     if (CPU_HAS_NEON_FEATURE)
 973     {
 974         uint32x4_t bits = vmovq_n_u32(0);
 975         for (; i <= n - 16; i += 16) {
 976             uint8x16_t A_vec = vld1q_u8 (a + i);
 977             uint8x16_t B_vec = vld1q_u8 (b + i);
 978             uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
 979             uint8x16_t bitsSet = vcntq_u8 (AxorB);
 980             uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
 981             uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
 982             bits = vaddq_u32(bits, bitSet4);
 983         }
 984         uint64x2_t bitSet2 = vpaddlq_u32 (bits);
 985         result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
 986         result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
 987     }
 988     else
 989 #endif
 990         for( ; i <= n - 4; i += 4 )
 991             result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
 992                     popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
 993     for( ; i < n; i++ )
 994         result += popCountTable[a[i] ^ b[i]];
 995     return result;
 996 }
 997
 998 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
 999 {
1000     if( cellSize == 1 )
1001         return normHamming(a, b, n);
1002     const uchar* tab = 0;
1003     if( cellSize == 2 )
1004         tab = popCountTable2;
1005     else if( cellSize == 4 )
1006         tab = popCountTable4;
1007     else
1008         CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1009     int i = 0, result = 0;
1010         #if CV_ENABLE_UNROLLED
1011     for( ; i <= n - 4; i += 4 )
1012         result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1013                 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1014     #endif
1015     for( ; i < n; i++ )
1016         result += tab[a[i] ^ b[i]];
1017     return result;
1018 }
1019
1020
1021 template<typename T, typename ST> int
1022 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1023 {
1024     ST result = *_result;
1025     if( !mask )
1026     {
1027         result = std::max(result, normInf<T, ST>(src, len*cn));
1028     }
1029     else
1030     {
1031         for( int i = 0; i < len; i++, src += cn )
1032             if( mask[i] )
1033             {
1034                 for( int k = 0; k < cn; k++ )
1035                     result = std::max(result, ST(fast_abs(src[k])));
1036             }
1037     }
1038     *_result = result;
1039     return 0;
1040 }
1041
1042 template<typename T, typename ST> int
1043 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1044 {
1045     ST result = *_result;
1046     if( !mask )
1047     {
1048         result += normL1<T, ST>(src, len*cn);
1049     }
1050     else
1051     {
1052         for( int i = 0; i < len; i++, src += cn )
1053             if( mask[i] )
1054             {
1055                 for( int k = 0; k < cn; k++ )
1056                     result += fast_abs(src[k]);
1057             }
1058     }
1059     *_result = result;
1060     return 0;
1061 }
1062
1063 template<typename T, typename ST> int
1064 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1065 {
1066     ST result = *_result;
1067     if( !mask )
1068     {
1069         result += normL2Sqr<T, ST>(src, len*cn);
1070     }
1071     else
1072     {
1073         for( int i = 0; i < len; i++, src += cn )
1074             if( mask[i] )
1075             {
1076                 for( int k = 0; k < cn; k++ )
1077                 {
1078                     T v = src[k];
1079                     result += (ST)v*v;
1080                 }
1081             }
1082     }
1083     *_result = result;
1084     return 0;
1085 }
1086
1087 template<typename T, typename ST> int
1088 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1089 {
1090     ST result = *_result;
1091     if( !mask )
1092     {
1093         result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
1094     }
1095     else
1096     {
1097         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1098             if( mask[i] )
1099             {
1100                 for( int k = 0; k < cn; k++ )
1101                     result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
1102             }
1103     }
1104     *_result = result;
1105     return 0;
1106 }
1107
1108 template<typename T, typename ST> int
1109 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1110 {
1111     ST result = *_result;
1112     if( !mask )
1113     {
1114         result += normL1<T, ST>(src1, src2, len*cn);
1115     }
1116     else
1117     {
1118         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1119             if( mask[i] )
1120             {
1121                 for( int k = 0; k < cn; k++ )
1122                     result += std::abs(src1[k] - src2[k]);
1123             }
1124     }
1125     *_result = result;
1126     return 0;
1127 }
1128
1129 template<typename T, typename ST> int
1130 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1131 {
1132     ST result = *_result;
1133     if( !mask )
1134     {
1135         result += normL2Sqr<T, ST>(src1, src2, len*cn);
1136     }
1137     else
1138     {
1139         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1140             if( mask[i] )
1141             {
1142                 for( int k = 0; k < cn; k++ )
1143                 {
1144                     ST v = src1[k] - src2[k];
1145                     result += v*v;
1146                 }
1147             }
1148     }
1149     *_result = result;
1150     return 0;
1151 }
1152
1153
1154 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
1155     static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
1156 { return norm##L##_(src, mask, r, len, cn); } \
1157     static int normDiff##L##_##suffix(const type* src1, const type* src2, \
1158     const uchar* mask, ntype* r, int len, int cn) \
1159 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
1160
1161 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
1162     CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
1163     CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
1164     CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
1165
1166 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
1167 CV_DEF_NORM_ALL(8s, schar, int, int, int)
1168 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
1169 CV_DEF_NORM_ALL(16s, short, int, int, double)
1170 CV_DEF_NORM_ALL(32s, int, int, double, double)
1171 CV_DEF_NORM_ALL(32f, float, float, double, double)
1172 CV_DEF_NORM_ALL(64f, double, double, double, double)
1173
1174
1175 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
1176 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
1177
1178 static NormFunc normTab[3][8] =
1179 {
1180     {
1181         (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)normInf_8s, (NormFunc)normInf_16u, (NormFunc)normInf_16s,
1182         (NormFunc)normInf_32s, (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
1183     },
1184     {
1185         (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)normL1_8s, (NormFunc)normL1_16u, (NormFunc)normL1_16s,
1186         (NormFunc)normL1_32s, (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
1187     },
1188     {
1189         (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)normL2_8s, (NormFunc)normL2_16u, (NormFunc)normL2_16s,
1190         (NormFunc)normL2_32s, (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
1191     }
1192 };
1193
1194 static NormDiffFunc normDiffTab[3][8] =
1195 {
1196     {
1197         (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
1198         (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
1199         (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
1200         (NormDiffFunc)normDiffInf_64f, 0
1201     },
1202     {
1203         (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
1204         (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
1205         (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
1206         (NormDiffFunc)normDiffL1_64f, 0
1207     },
1208     {
1209         (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
1210         (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
1211         (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
1212         (NormDiffFunc)normDiffL2_64f, 0
1213     }
1214 };
1215
1216 }
1217
1218 double cv::norm( InputArray _src, int normType, InputArray _mask )
1219 {
1220     Mat src = _src.getMat(), mask = _mask.getMat();
1221     int depth = src.depth(), cn = src.channels();
1222
1223     normType &= 7;
1224     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
1225
1226     if( depth == CV_32F && src.isContinuous() && mask.empty() )
1227     {
1228         size_t len = src.total()*cn;
1229         if( len == (size_t)(int)len )
1230         {
1231             const float* data = src.ptr<float>();
1232
1233             if( normType == NORM_L2 )
1234             {
1235                 double result = 0;
1236                 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1237                 return std::sqrt(result);
1238             }
1239             if( normType == NORM_L1 )
1240             {
1241                 double result = 0;
1242                 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
1243                 return result;
1244             }
1245             {
1246                 float result = 0;
1247                 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
1248                 return result;
1249
1250             }
1251         }
1252     }
1253
1254     CV_Assert( mask.empty() || mask.type() == CV_8U );
1255
1256     NormFunc func = normTab[normType >> 1][depth];
1257     CV_Assert( func != 0 );
1258
1259     const Mat* arrays[] = {&src, &mask, 0};
1260     uchar* ptrs[2];
1261     union
1262     {
1263         double d;
1264         int i;
1265         float f;
1266     }
1267     result;
1268     result.d = 0;
1269     NAryMatIterator it(arrays, ptrs);
1270     int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1271     bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1272             (normType == NORM_L2 && depth <= CV_8S);
1273     int isum = 0;
1274     int *ibuf = &result.i;
1275     size_t esz = 0;
1276
1277     if( blockSum )
1278     {
1279         intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
1280         blockSize = std::min(blockSize, intSumBlockSize);
1281         ibuf = &isum;
1282         esz = src.elemSize();
1283     }
1284
1285     for( size_t i = 0; i < it.nplanes; i++, ++it )
1286     {
1287         for( j = 0; j < total; j += blockSize )
1288         {
1289             int bsz = std::min(total - j, blockSize);
1290             func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
1291             count += bsz;
1292             if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1293             {
1294                 result.d += isum;
1295                 isum = 0;
1296                 count = 0;
1297             }
1298             ptrs[0] += bsz*esz;
1299             if( ptrs[1] )
1300                 ptrs[1] += bsz;
1301         }
1302     }
1303
1304     if( normType == NORM_INF )
1305     {
1306         if( depth == CV_64F )
1307             ;
1308         else if( depth == CV_32F )
1309             result.d = result.f;
1310         else
1311             result.d = result.i;
1312     }
1313     else if( normType == NORM_L2 )
1314         result.d = std::sqrt(result.d);
1315
1316     return result.d;
1317 }
1318
1319
1320 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1321 {
1322     if( normType & CV_RELATIVE )
1323         return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1324
1325     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1326     int depth = src1.depth(), cn = src1.channels();
1327
1328     CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
1329
1330     normType &= 7;
1331     CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
1332
1333     if( src1.depth() == CV_32F && src1.isContinuous() && src2.isContinuous() && mask.empty() )
1334     {
1335         size_t len = src1.total()*src1.channels();
1336         if( len == (size_t)(int)len )
1337         {
1338             const float* data1 = src1.ptr<float>();
1339             const float* data2 = src2.ptr<float>();
1340
1341             if( normType == NORM_L2 )
1342             {
1343                 double result = 0;
1344                 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1345                 return std::sqrt(result);
1346             }
1347             if( normType == NORM_L1 )
1348             {
1349                 double result = 0;
1350                 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1351                 return result;
1352             }
1353             {
1354                 float result = 0;
1355                 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1356                 return result;
1357             }
1358         }
1359     }
1360
1361     CV_Assert( mask.empty() || mask.type() == CV_8U );
1362
1363     NormDiffFunc func = normDiffTab[normType >> 1][depth];
1364     CV_Assert( func != 0 );
1365
1366     const Mat* arrays[] = {&src1, &src2, &mask, 0};
1367     uchar* ptrs[3];
1368     union
1369     {
1370         double d;
1371         float f;
1372         int i;
1373         unsigned u;
1374     }
1375     result;
1376     result.d = 0;
1377     NAryMatIterator it(arrays, ptrs);
1378     int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1379     bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1380             (normType == NORM_L2 && depth <= CV_8S);
1381     unsigned isum = 0;
1382     unsigned *ibuf = &result.u;
1383     size_t esz = 0;
1384
1385     if( blockSum )
1386     {
1387         intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1388         blockSize = std::min(blockSize, intSumBlockSize);
1389         ibuf = &isum;
1390         esz = src1.elemSize();
1391     }
1392
1393     for( size_t i = 0; i < it.nplanes; i++, ++it )
1394     {
1395         for( j = 0; j < total; j += blockSize )
1396         {
1397             int bsz = std::min(total - j, blockSize);
1398             func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
1399             count += bsz;
1400             if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1401             {
1402                 result.d += isum;
1403                 isum = 0;
1404                 count = 0;
1405             }
1406             ptrs[0] += bsz*esz;
1407             ptrs[1] += bsz*esz;
1408             if( ptrs[2] )
1409                 ptrs[2] += bsz;
1410         }
1411     }
1412
1413     if( normType == NORM_INF )
1414     {
1415         if( depth == CV_64F )
1416             ;
1417         else if( depth == CV_32F )
1418             result.d = result.f;
1419         else
1420             result.d = result.u;
1421     }
1422     else if( normType == NORM_L2 )
1423         result.d = std::sqrt(result.d);
1424
1425     return result.d;
1426 }
1427
1428
1429 ///////////////////////////////////// batch distance ///////////////////////////////////////
1430
1431 namespace cv
1432 {
1433
1434 template<typename _Tp, typename _Rt>
1435 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
1436                   int nvecs, int len, _Rt* dist, const uchar* mask)
1437 {
1438     step2 /= sizeof(src2[0]);
1439     if( !mask )
1440     {
1441         for( int i = 0; i < nvecs; i++ )
1442             dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
1443     }
1444     else
1445     {
1446         _Rt val0 = std::numeric_limits<_Rt>::max();
1447         for( int i = 0; i < nvecs; i++ )
1448             dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1449     }
1450 }
1451
1452 template<typename _Tp, typename _Rt>
1453 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
1454                      int nvecs, int len, _Rt* dist, const uchar* mask)
1455 {
1456     step2 /= sizeof(src2[0]);
1457     if( !mask )
1458     {
1459         for( int i = 0; i < nvecs; i++ )
1460             dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
1461     }
1462     else
1463     {
1464         _Rt val0 = std::numeric_limits<_Rt>::max();
1465         for( int i = 0; i < nvecs; i++ )
1466             dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1467     }
1468 }
1469
1470 template<typename _Tp, typename _Rt>
1471 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
1472                   int nvecs, int len, _Rt* dist, const uchar* mask)
1473 {
1474     step2 /= sizeof(src2[0]);
1475     if( !mask )
1476     {
1477         for( int i = 0; i < nvecs; i++ )
1478             dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
1479     }
1480     else
1481     {
1482         _Rt val0 = std::numeric_limits<_Rt>::max();
1483         for( int i = 0; i < nvecs; i++ )
1484             dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
1485     }
1486 }
1487
1488 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
1489                              int nvecs, int len, int* dist, const uchar* mask)
1490 {
1491     step2 /= sizeof(src2[0]);
1492     if( !mask )
1493     {
1494         for( int i = 0; i < nvecs; i++ )
1495             dist[i] = normHamming(src1, src2 + step2*i, len);
1496     }
1497     else
1498     {
1499         int val0 = INT_MAX;
1500         for( int i = 0; i < nvecs; i++ )
1501             dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
1502     }
1503 }
1504
1505 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
1506                               int nvecs, int len, int* dist, const uchar* mask)
1507 {
1508     step2 /= sizeof(src2[0]);
1509     if( !mask )
1510     {
1511         for( int i = 0; i < nvecs; i++ )
1512             dist[i] = normHamming(src1, src2 + step2*i, len, 2);
1513     }
1514     else
1515     {
1516         int val0 = INT_MAX;
1517         for( int i = 0; i < nvecs; i++ )
1518             dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
1519     }
1520 }
1521
1522 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1523                                int nvecs, int len, int* dist, const uchar* mask)
1524 {
1525     batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1526 }
1527
1528 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1529                                int nvecs, int len, float* dist, const uchar* mask)
1530 {
1531     batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1532 }
1533
1534 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1535                                   int nvecs, int len, int* dist, const uchar* mask)
1536 {
1537     batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1538 }
1539
1540 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1541                                   int nvecs, int len, float* dist, const uchar* mask)
1542 {
1543     batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1544 }
1545
1546 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1547                                int nvecs, int len, float* dist, const uchar* mask)
1548 {
1549     batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1550 }
1551
1552 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
1553                              int nvecs, int len, float* dist, const uchar* mask)
1554 {
1555     batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1556 }
1557
1558 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
1559                                 int nvecs, int len, float* dist, const uchar* mask)
1560 {
1561     batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1562 }
1563
1564 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
1565                              int nvecs, int len, float* dist, const uchar* mask)
1566 {
1567     batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1568 }
1569
1570 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
1571                               int nvecs, int len, uchar* dist, const uchar* mask);
1572
1573
1574 struct BatchDistInvoker
1575 {
1576     BatchDistInvoker( const Mat& _src1, const Mat& _src2,
1577                       Mat& _dist, Mat& _nidx, int _K,
1578                       const Mat& _mask, int _update,
1579                       BatchDistFunc _func)
1580     {
1581         src1 = &_src1;
1582         src2 = &_src2;
1583         dist = &_dist;
1584         nidx = &_nidx;
1585         K = _K;
1586         mask = &_mask;
1587         update = _update;
1588         func = _func;
1589     }
1590
1591     void operator()(const BlockedRange& range) const
1592     {
1593         AutoBuffer<int> buf(src2->rows);
1594         int* bufptr = buf;
1595
1596         for( int i = range.begin(); i < range.end(); i++ )
1597         {
1598             func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
1599                  K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
1600
1601             if( K > 0 )
1602             {
1603                 int* nidxptr = nidx->ptr<int>(i);
1604                 // since positive float's can be compared just like int's,
1605                 // we handle both CV_32S and CV_32F cases with a single branch
1606                 int* distptr = (int*)dist->ptr(i);
1607
1608                 int k, k0, k0_, j;
1609                 for( k0 = 0; k0 < K; k0++ )
1610                     if( nidxptr[k0] < 0 )
1611                         break;
1612                 k0_ = std::max(k0, 1);
1613
1614                 for( j = 0; j < src2->rows; j++ )
1615                 {
1616                     int d = bufptr[j];
1617                     if( d < distptr[k0_-1] )
1618                     {
1619                         for( k = std::min(k0-1, K-2); k >= 0 && distptr[k] > d; k-- )
1620                         {
1621                             nidxptr[k+1] = nidxptr[k];
1622                             distptr[k+1] = distptr[k];
1623                         }
1624                         nidxptr[k+1] = j + update;
1625                         distptr[k+1] = d;
1626                         k0_ = k0 = std::min(k0 + 1, K);
1627                     }
1628                 }
1629             }
1630         }
1631     }
1632
1633     const Mat *src1;
1634     const Mat *src2;
1635     Mat *dist;
1636     Mat *nidx;
1637     const Mat *mask;
1638     int K;
1639     int update;
1640     BatchDistFunc func;
1641 };
1642
1643 }
1644
1645 void cv::batchDistance( InputArray _src1, InputArray _src2,
1646                         OutputArray _dist, int dtype, OutputArray _nidx,
1647                         int normType, int K, InputArray _mask,
1648                         int update, bool crosscheck )
1649 {
1650     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1651     int type = src1.type();
1652     CV_Assert( type == src2.type() && src1.cols == src2.cols &&
1653                (type == CV_32F || type == CV_8U));
1654     CV_Assert( _nidx.needed() == (K > 0) );
1655
1656     if( dtype == -1 )
1657     {
1658         dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
1659     }
1660     CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
1661
1662     K = std::min(K, src2.rows);
1663
1664     _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
1665     Mat dist = _dist.getMat(), nidx;
1666     if( _nidx.needed() )
1667     {
1668         _nidx.create(dist.size(), CV_32S);
1669         nidx = _nidx.getMat();
1670     }
1671
1672     if( update == 0 && K > 0 )
1673     {
1674         dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
1675         nidx = Scalar::all(-1);
1676     }
1677
1678     if( crosscheck )
1679     {
1680         CV_Assert( K == 1 && update == 0 && mask.empty() );
1681         Mat tdist, tidx;
1682         batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
1683
1684         // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
1685         // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
1686         // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
1687         // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
1688         // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
1689         // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
1690         if( dtype == CV_32S )
1691         {
1692             for( int i = 0; i < tdist.rows; i++ )
1693             {
1694                 int idx = tidx.at<int>(i);
1695                 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
1696                 if( d < d0 )
1697                 {
1698                     dist.at<int>(idx) = d0;
1699                     nidx.at<int>(idx) = i + update;
1700                 }
1701             }
1702         }
1703         else
1704         {
1705             for( int i = 0; i < tdist.rows; i++ )
1706             {
1707                 int idx = tidx.at<int>(i);
1708                 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
1709                 if( d < d0 )
1710                 {
1711                     dist.at<float>(idx) = d0;
1712                     nidx.at<int>(idx) = i + update;
1713                 }
1714             }
1715         }
1716         return;
1717     }
1718
1719     BatchDistFunc func = 0;
1720     if( type == CV_8U )
1721     {
1722         if( normType == NORM_L1 && dtype == CV_32S )
1723             func = (BatchDistFunc)batchDistL1_8u32s;
1724         else if( normType == NORM_L1 && dtype == CV_32F )
1725             func = (BatchDistFunc)batchDistL1_8u32f;
1726         else if( normType == NORM_L2SQR && dtype == CV_32S )
1727             func = (BatchDistFunc)batchDistL2Sqr_8u32s;
1728         else if( normType == NORM_L2SQR && dtype == CV_32F )
1729             func = (BatchDistFunc)batchDistL2Sqr_8u32f;
1730         else if( normType == NORM_L2 && dtype == CV_32F )
1731             func = (BatchDistFunc)batchDistL2_8u32f;
1732         else if( normType == NORM_HAMMING && dtype == CV_32S )
1733             func = (BatchDistFunc)batchDistHamming;
1734         else if( normType == NORM_HAMMING2 && dtype == CV_32S )
1735             func = (BatchDistFunc)batchDistHamming2;
1736     }
1737     else if( type == CV_32F && dtype == CV_32F )
1738     {
1739         if( normType == NORM_L1 )
1740             func = (BatchDistFunc)batchDistL1_32f;
1741         else if( normType == NORM_L2SQR )
1742             func = (BatchDistFunc)batchDistL2Sqr_32f;
1743         else if( normType == NORM_L2 )
1744             func = (BatchDistFunc)batchDistL2_32f;
1745     }
1746
1747     if( func == 0 )
1748         CV_Error_(CV_StsUnsupportedFormat,
1749                   ("The combination of type=%d, dtype=%d and normType=%d is not supported",
1750                    type, dtype, normType));
1751
1752     parallel_for(BlockedRange(0, src1.rows),
1753                  BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
1754 }
1755
1756
1757 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
1758 {
1759     cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
1760     if( CV_IS_IMAGE(srcarr) )
1761     {
1762         int coi = cvGetImageCOI((IplImage*)srcarr);
1763         if( coi )
1764         {
1765             CV_Assert( 0 < coi && coi <= 4 );
1766             sum = cv::Scalar(sum[coi-1]);
1767         }
1768     }
1769     return sum;
1770 }
1771
1772 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
1773 {
1774     cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1775     if( img.channels() > 1 )
1776         cv::extractImageCOI(imgarr, img);
1777     return countNonZero(img);
1778 }
1779
1780
1781 CV_IMPL  CvScalar
1782 cvAvg( const void* imgarr, const void* maskarr )
1783 {
1784     cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1785     cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
1786     if( CV_IS_IMAGE(imgarr) )
1787     {
1788         int coi = cvGetImageCOI((IplImage*)imgarr);
1789         if( coi )
1790         {
1791             CV_Assert( 0 < coi && coi <= 4 );
1792             mean = cv::Scalar(mean[coi-1]);
1793         }
1794     }
1795     return mean;
1796 }
1797
1798
1799 CV_IMPL  void
1800 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
1801 {
1802     cv::Scalar mean, sdv;
1803
1804     cv::Mat mask;
1805     if( maskarr )
1806         mask = cv::cvarrToMat(maskarr);
1807
1808     cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
1809
1810     if( CV_IS_IMAGE(imgarr) )
1811     {
1812         int coi = cvGetImageCOI((IplImage*)imgarr);
1813         if( coi )
1814         {
1815             CV_Assert( 0 < coi && coi <= 4 );
1816             mean = cv::Scalar(mean[coi-1]);
1817             sdv = cv::Scalar(sdv[coi-1]);
1818         }
1819     }
1820
1821     if( _mean )
1822         *(cv::Scalar*)_mean = mean;
1823     if( _sdv )
1824         *(cv::Scalar*)_sdv = sdv;
1825 }
1826
1827
1828 CV_IMPL void
1829 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
1830              CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
1831 {
1832     cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
1833     if( maskarr )
1834         mask = cv::cvarrToMat(maskarr);
1835     if( img.channels() > 1 )
1836         cv::extractImageCOI(imgarr, img);
1837
1838     cv::minMaxLoc( img, _minVal, _maxVal,
1839                    (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
1840 }
1841
1842
1843 CV_IMPL  double
1844 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
1845 {
1846     cv::Mat a, mask;
1847     if( !imgA )
1848     {
1849         imgA = imgB;
1850         imgB = 0;
1851     }
1852
1853     a = cv::cvarrToMat(imgA, false, true, 1);
1854     if( maskarr )
1855         mask = cv::cvarrToMat(maskarr);
1856
1857     if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
1858         cv::extractImageCOI(imgA, a);
1859
1860     if( !imgB )
1861         return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
1862
1863     cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
1864     if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
1865         cv::extractImageCOI(imgB, b);
1866
1867     return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
1868 }