1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
49 template<typename T> static inline Scalar rawToScalar(const T& v)
52 typedef typename DataType<T>::channel_type T1;
53 int i, n = DataType<T>::channels;
54 for( i = 0; i < n; i++ )
55 s.val[i] = ((T1*)&v)[i];
59 /****************************************************************************************\
61 \****************************************************************************************/
63 template<typename T, typename ST>
64 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
75 #if CV_ENABLE_UNROLLED
76 for(; i <= len - 4; i += 4, src += cn*4 )
77 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
79 for( ; i < len; i++, src += cn )
85 ST s0 = dst[0], s1 = dst[1];
86 for( i = 0; i < len; i++, src += cn )
96 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
97 for( i = 0; i < len; i++, src += cn )
108 for( ; k < cn; k += 4 )
111 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
112 for( i = 0; i < len; i++, src += cn )
114 s0 += src[0]; s1 += src[1];
115 s2 += src[2]; s3 += src[3];
129 for( i = 0; i < len; i++ )
139 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
140 for( i = 0; i < len; i++, src += 3 )
154 for( i = 0; i < len; i++, src += cn )
158 #if CV_ENABLE_UNROLLED
159 for( ; k <= cn - 4; k += 4 )
162 s0 = dst[k] + src[k];
163 s1 = dst[k+1] + src[k+1];
164 dst[k] = s0; dst[k+1] = s1;
165 s0 = dst[k+2] + src[k+2];
166 s1 = dst[k+3] + src[k+3];
167 dst[k+2] = s0; dst[k+3] = s1;
179 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
180 { return sum_(src, mask, dst, len, cn); }
182 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
202 static SumFunc sumTab[] =
204 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
205 (SumFunc)sum16u, (SumFunc)sum16s,
207 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
212 static int countNonZero_(const T* src, int len )
215 #if CV_ENABLE_UNROLLED
216 for(; i <= len - 4; i += 4 )
217 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
219 for( ; i < len; i++ )
224 static int countNonZero8u( const uchar* src, int len )
230 __m128i pattern = _mm_setzero_si128 ();
231 static uchar tab[256];
232 static volatile bool initialized = false;
235 // we compute inverse popcount table,
236 // since we pass (img[x] == 0) mask as index in the table.
237 for( int j = 0; j < 256; j++ )
240 for( int mask = 1; mask < 256; mask += mask )
241 val += (j & mask) == 0;
247 for (; i<=len-16; i+=16)
249 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
250 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
251 nz += tab[val & 255] + tab[val >> 8];
255 for( ; i < len; i++ )
260 static int countNonZero16u( const ushort* src, int len )
261 { return countNonZero_(src, len); }
263 static int countNonZero32s( const int* src, int len )
264 { return countNonZero_(src, len); }
266 static int countNonZero32f( const float* src, int len )
267 { return countNonZero_(src, len); }
269 static int countNonZero64f( const double* src, int len )
270 { return countNonZero_(src, len); }
272 typedef int (*CountNonZeroFunc)(const uchar*, int);
274 static CountNonZeroFunc countNonZeroTab[] =
276 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
277 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
278 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
279 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
283 template<typename T, typename ST, typename SQT>
284 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
297 for( i = 0; i < len; i++, src += cn )
300 s0 += v; sq0 += (SQT)v*v;
307 ST s0 = sum[0], s1 = sum[1];
308 SQT sq0 = sqsum[0], sq1 = sqsum[1];
309 for( i = 0; i < len; i++, src += cn )
311 T v0 = src[0], v1 = src[1];
312 s0 += v0; sq0 += (SQT)v0*v0;
313 s1 += v1; sq1 += (SQT)v1*v1;
315 sum[0] = s0; sum[1] = s1;
316 sqsum[0] = sq0; sqsum[1] = sq1;
320 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
321 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
322 for( i = 0; i < len; i++, src += cn )
324 T v0 = src[0], v1 = src[1], v2 = src[2];
325 s0 += v0; sq0 += (SQT)v0*v0;
326 s1 += v1; sq1 += (SQT)v1*v1;
327 s2 += v2; sq2 += (SQT)v2*v2;
329 sum[0] = s0; sum[1] = s1; sum[2] = s2;
330 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
333 for( ; k < cn; k += 4 )
336 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
337 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
338 for( i = 0; i < len; i++, src += cn )
341 v0 = src[0], v1 = src[1];
342 s0 += v0; sq0 += (SQT)v0*v0;
343 s1 += v1; sq1 += (SQT)v1*v1;
344 v0 = src[2], v1 = src[3];
345 s2 += v0; sq2 += (SQT)v0*v0;
346 s3 += v1; sq3 += (SQT)v1*v1;
348 sum[k] = s0; sum[k+1] = s1;
349 sum[k+2] = s2; sum[k+3] = s3;
350 sqsum[k] = sq0; sqsum[k+1] = sq1;
351 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
362 for( i = 0; i < len; i++ )
366 s0 += v; sq0 += (SQT)v*v;
374 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
375 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
376 for( i = 0; i < len; i++, src += 3 )
379 T v0 = src[0], v1 = src[1], v2 = src[2];
380 s0 += v0; sq0 += (SQT)v0*v0;
381 s1 += v1; sq1 += (SQT)v1*v1;
382 s2 += v2; sq2 += (SQT)v2*v2;
385 sum[0] = s0; sum[1] = s1; sum[2] = s2;
386 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
390 for( i = 0; i < len; i++, src += cn )
393 for( int k = 0; k < cn; k++ )
397 SQT sq = sqsum[k] + (SQT)v*v;
398 sum[k] = s; sqsum[k] = sq;
407 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
408 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
410 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
411 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
413 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
414 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
416 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
417 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
419 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
420 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
422 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
423 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
425 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
426 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
428 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
430 static SumSqrFunc sumSqrTab[] =
432 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
433 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
438 cv::Scalar cv::sum( InputArray _src )
440 Mat src = _src.getMat();
441 int k, cn = src.channels(), depth = src.depth();
442 SumFunc func = sumTab[depth];
444 CV_Assert( cn <= 4 && func != 0 );
446 const Mat* arrays[] = {&src, 0};
448 NAryMatIterator it(arrays, ptrs);
450 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
452 AutoBuffer<int> _buf;
453 int* buf = (int*)&s[0];
455 bool blockSum = depth < CV_32S;
459 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
460 blockSize = std::min(blockSize, intSumBlockSize);
464 for( k = 0; k < cn; k++ )
466 esz = src.elemSize();
469 for( size_t i = 0; i < it.nplanes; i++, ++it )
471 for( j = 0; j < total; j += blockSize )
473 int bsz = std::min(total - j, blockSize);
474 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
476 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
478 for( k = 0; k < cn; k++ )
491 int cv::countNonZero( InputArray _src )
493 Mat src = _src.getMat();
494 CountNonZeroFunc func = countNonZeroTab[src.depth()];
496 CV_Assert( src.channels() == 1 && func != 0 );
498 const Mat* arrays[] = {&src, 0};
500 NAryMatIterator it(arrays, ptrs);
501 int total = (int)it.size, nz = 0;
503 for( size_t i = 0; i < it.nplanes; i++, ++it )
504 nz += func( ptrs[0], total );
509 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
511 Mat src = _src.getMat(), mask = _mask.getMat();
512 CV_Assert( mask.empty() || mask.type() == CV_8U );
514 int k, cn = src.channels(), depth = src.depth();
515 SumFunc func = sumTab[depth];
517 CV_Assert( cn <= 4 && func != 0 );
519 const Mat* arrays[] = {&src, &mask, 0};
521 NAryMatIterator it(arrays, ptrs);
523 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
525 AutoBuffer<int> _buf;
526 int* buf = (int*)&s[0];
527 bool blockSum = depth <= CV_16S;
528 size_t esz = 0, nz0 = 0;
532 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
533 blockSize = std::min(blockSize, intSumBlockSize);
537 for( k = 0; k < cn; k++ )
539 esz = src.elemSize();
542 for( size_t i = 0; i < it.nplanes; i++, ++it )
544 for( j = 0; j < total; j += blockSize )
546 int bsz = std::min(total - j, blockSize);
547 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
550 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
552 for( k = 0; k < cn; k++ )
564 return s*(nz0 ? 1./nz0 : 0);
568 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
570 Mat src = _src.getMat(), mask = _mask.getMat();
571 CV_Assert( mask.empty() || mask.type() == CV_8U );
573 int k, cn = src.channels(), depth = src.depth();
574 SumSqrFunc func = sumSqrTab[depth];
576 CV_Assert( func != 0 );
578 const Mat* arrays[] = {&src, &mask, 0};
580 NAryMatIterator it(arrays, ptrs);
581 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
582 int j, count = 0, nz0 = 0;
583 AutoBuffer<double> _buf(cn*4);
584 double *s = (double*)_buf, *sq = s + cn;
585 int *sbuf = (int*)s, *sqbuf = (int*)sq;
586 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
589 for( k = 0; k < cn; k++ )
594 intSumBlockSize = 1 << 15;
595 blockSize = std::min(blockSize, intSumBlockSize);
596 sbuf = (int*)(sq + cn);
599 for( k = 0; k < cn; k++ )
600 sbuf[k] = sqbuf[k] = 0;
601 esz = src.elemSize();
604 for( size_t i = 0; i < it.nplanes; i++, ++it )
606 for( j = 0; j < total; j += blockSize )
608 int bsz = std::min(total - j, blockSize);
609 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
612 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
614 for( k = 0; k < cn; k++ )
621 for( k = 0; k < cn; k++ )
635 double scale = nz0 ? 1./nz0 : 0.;
636 for( k = 0; k < cn; k++ )
639 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
642 for( j = 0; j < 2; j++ )
644 const double* sptr = j == 0 ? s : sq;
645 _OutputArray _dst = j == 0 ? _mean : _sdv;
649 if( !_dst.fixedSize() )
650 _dst.create(cn, 1, CV_64F, -1, true);
651 Mat dst = _dst.getMat();
652 int dcn = (int)dst.total();
653 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
654 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
655 double* dptr = dst.ptr<double>();
656 for( k = 0; k < cn; k++ )
658 for( ; k < dcn; k++ )
663 /****************************************************************************************\
665 \****************************************************************************************/
670 template<typename T, typename WT> static void
671 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
672 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
674 WT minVal = *_minVal, maxVal = *_maxVal;
675 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
679 for( int i = 0; i < len; i++ )
685 minIdx = startIdx + i;
690 maxIdx = startIdx + i;
696 for( int i = 0; i < len; i++ )
699 if( mask[i] && val < minVal )
702 minIdx = startIdx + i;
704 if( mask[i] && val > maxVal )
707 maxIdx = startIdx + i;
718 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
719 size_t* minidx, size_t* maxidx, int len, size_t startidx )
720 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
722 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
723 size_t* minidx, size_t* maxidx, int len, size_t startidx )
724 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
726 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
727 size_t* minidx, size_t* maxidx, int len, size_t startidx )
728 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
730 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
731 size_t* minidx, size_t* maxidx, int len, size_t startidx )
732 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
734 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
735 size_t* minidx, size_t* maxidx, int len, size_t startidx )
736 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
738 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
739 size_t* minidx, size_t* maxidx, int len, size_t startidx )
740 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
742 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
743 size_t* minidx, size_t* maxidx, int len, size_t startidx )
744 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
746 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
748 static MinMaxIdxFunc minmaxTab[] =
750 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
751 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
752 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
753 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
757 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
763 for( i = d-1; i >= 0; i-- )
766 idx[i] = (int)(ofs % sz);
772 for( i = d-1; i >= 0; i-- )
779 void cv::minMaxIdx(InputArray _src, double* minVal,
780 double* maxVal, int* minIdx, int* maxIdx,
783 Mat src = _src.getMat(), mask = _mask.getMat();
784 int depth = src.depth(), cn = src.channels();
786 CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) ||
787 (cn >= 1 && mask.empty() && !minIdx && !maxIdx) );
788 MinMaxIdxFunc func = minmaxTab[depth];
789 CV_Assert( func != 0 );
791 const Mat* arrays[] = {&src, &mask, 0};
793 NAryMatIterator it(arrays, ptrs);
795 size_t minidx = 0, maxidx = 0;
796 int iminval = INT_MAX, imaxval = INT_MIN;
797 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
798 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
800 int *minval = &iminval, *maxval = &imaxval;
801 int planeSize = (int)it.size*cn;
803 if( depth == CV_32F )
804 minval = (int*)&fminval, maxval = (int*)&fmaxval;
805 else if( depth == CV_64F )
806 minval = (int*)&dminval, maxval = (int*)&dmaxval;
808 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
809 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
812 dminval = dmaxval = 0;
813 else if( depth == CV_32F )
814 dminval = fminval, dmaxval = fmaxval;
815 else if( depth <= CV_32S )
816 dminval = iminval, dmaxval = imaxval;
824 ofs2idx(src, minidx, minIdx);
826 ofs2idx(src, maxidx, maxIdx);
829 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
830 Point* minLoc, Point* maxLoc, InputArray mask )
832 Mat img = _img.getMat();
833 CV_Assert(img.dims <= 2);
835 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
837 std::swap(minLoc->x, minLoc->y);
839 std::swap(maxLoc->x, maxLoc->y);
842 /****************************************************************************************\
844 \****************************************************************************************/
849 float normL2Sqr_(const float* a, const float* b, int n)
851 int j = 0; float d = 0.f;
855 float CV_DECL_ALIGNED(16) buf[4];
856 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
858 for( ; j <= n - 8; j += 8 )
860 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
861 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
862 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
863 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
865 _mm_store_ps(buf, _mm_add_ps(d0, d1));
866 d = buf[0] + buf[1] + buf[2] + buf[3];
871 for( ; j <= n - 4; j += 4 )
873 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
874 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
880 float t = a[j] - b[j];
887 float normL1_(const float* a, const float* b, int n)
889 int j = 0; float d = 0.f;
893 float CV_DECL_ALIGNED(16) buf[4];
894 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
895 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
896 __m128 absmask = _mm_load_ps((const float*)absbuf);
898 for( ; j <= n - 8; j += 8 )
900 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
901 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
902 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
903 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
905 _mm_store_ps(buf, _mm_add_ps(d0, d1));
906 d = buf[0] + buf[1] + buf[2] + buf[3];
911 for( ; j <= n - 4; j += 4 )
913 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
914 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
919 d += std::abs(a[j] - b[j]);
923 int normL1_(const uchar* a, const uchar* b, int n)
929 __m128i d0 = _mm_setzero_si128();
931 for( ; j <= n - 16; j += 16 )
933 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
934 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
936 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
939 for( ; j <= n - 4; j += 4 )
941 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
942 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
944 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
946 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
951 for( ; j <= n - 4; j += 4 )
953 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
954 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
958 d += std::abs(a[j] - b[j]);
962 static const uchar popCountTable[] =
964 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
965 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
966 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
967 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
968 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
969 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
970 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
971 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
974 static const uchar popCountTable2[] =
976 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
977 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
978 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
979 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
980 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
981 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
982 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
983 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
986 static const uchar popCountTable4[] =
988 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
989 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
990 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
991 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
992 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
993 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
994 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
995 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
998 static int normHamming(const uchar* a, int n)
1000 int i = 0, result = 0;
1002 uint32x4_t bits = vmovq_n_u32(0);
1003 for (; i <= n - 16; i += 16) {
1004 uint8x16_t A_vec = vld1q_u8 (a + i);
1005 uint8x16_t bitsSet = vcntq_u8 (A_vec);
1006 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1007 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1008 bits = vaddq_u32(bits, bitSet4);
1010 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1011 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1012 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1014 for( ; i <= n - 4; i += 4 )
1015 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
1016 popCountTable[a[i+2]] + popCountTable[a[i+3]];
1019 result += popCountTable[a[i]];
1023 int normHamming(const uchar* a, const uchar* b, int n)
1025 int i = 0, result = 0;
1027 uint32x4_t bits = vmovq_n_u32(0);
1028 for (; i <= n - 16; i += 16) {
1029 uint8x16_t A_vec = vld1q_u8 (a + i);
1030 uint8x16_t B_vec = vld1q_u8 (b + i);
1031 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
1032 uint8x16_t bitsSet = vcntq_u8 (AxorB);
1033 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1034 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1035 bits = vaddq_u32(bits, bitSet4);
1037 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1038 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1039 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1041 for( ; i <= n - 4; i += 4 )
1042 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
1043 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
1046 result += popCountTable[a[i] ^ b[i]];
1050 static int normHamming(const uchar* a, int n, int cellSize)
1053 return normHamming(a, n);
1054 const uchar* tab = 0;
1056 tab = popCountTable2;
1057 else if( cellSize == 4 )
1058 tab = popCountTable4;
1060 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1061 int i = 0, result = 0;
1062 #if CV_ENABLE_UNROLLED
1063 for( ; i <= n - 4; i += 4 )
1064 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
1067 result += tab[a[i]];
1071 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1074 return normHamming(a, b, n);
1075 const uchar* tab = 0;
1077 tab = popCountTable2;
1078 else if( cellSize == 4 )
1079 tab = popCountTable4;
1081 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1082 int i = 0, result = 0;
1083 #if CV_ENABLE_UNROLLED
1084 for( ; i <= n - 4; i += 4 )
1085 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1086 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1089 result += tab[a[i] ^ b[i]];
1094 template<typename T, typename ST> int
1095 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1097 ST result = *_result;
1100 result = std::max(result, normInf<T, ST>(src, len*cn));
1104 for( int i = 0; i < len; i++, src += cn )
1107 for( int k = 0; k < cn; k++ )
1108 result = std::max(result, ST(fast_abs(src[k])));
1115 template<typename T, typename ST> int
1116 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1118 ST result = *_result;
1121 result += normL1<T, ST>(src, len*cn);
1125 for( int i = 0; i < len; i++, src += cn )
1128 for( int k = 0; k < cn; k++ )
1129 result += fast_abs(src[k]);
1136 template<typename T, typename ST> int
1137 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1139 ST result = *_result;
1142 result += normL2Sqr<T, ST>(src, len*cn);
1146 for( int i = 0; i < len; i++, src += cn )
1149 for( int k = 0; k < cn; k++ )
1160 template<typename T, typename ST> int
1161 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1163 ST result = *_result;
1166 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
1170 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1173 for( int k = 0; k < cn; k++ )
1174 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
1181 template<typename T, typename ST> int
1182 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1184 ST result = *_result;
1187 result += normL1<T, ST>(src1, src2, len*cn);
1191 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1194 for( int k = 0; k < cn; k++ )
1195 result += std::abs(src1[k] - src2[k]);
1202 template<typename T, typename ST> int
1203 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1205 ST result = *_result;
1208 result += normL2Sqr<T, ST>(src1, src2, len*cn);
1212 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1215 for( int k = 0; k < cn; k++ )
1217 ST v = src1[k] - src2[k];
1227 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
1228 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
1229 { return norm##L##_(src, mask, r, len, cn); } \
1230 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
1231 const uchar* mask, ntype* r, int len, int cn) \
1232 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
1234 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
1235 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
1236 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
1237 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
1239 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
1240 CV_DEF_NORM_ALL(8s, schar, int, int, int)
1241 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
1242 CV_DEF_NORM_ALL(16s, short, int, int, double)
1243 CV_DEF_NORM_ALL(32s, int, int, double, double)
1244 CV_DEF_NORM_ALL(32f, float, float, double, double)
1245 CV_DEF_NORM_ALL(64f, double, double, double, double)
1248 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
1249 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
1251 static NormFunc normTab[3][8] =
1254 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
1255 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
1258 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
1259 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
1262 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
1263 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
1267 static NormDiffFunc normDiffTab[3][8] =
1270 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
1271 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
1272 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
1273 (NormDiffFunc)normDiffInf_64f, 0
1276 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
1277 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
1278 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
1279 (NormDiffFunc)normDiffL1_64f, 0
1282 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
1283 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
1284 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
1285 (NormDiffFunc)normDiffL2_64f, 0
1291 double cv::norm( InputArray _src, int normType, InputArray _mask )
1293 Mat src = _src.getMat(), mask = _mask.getMat();
1294 int depth = src.depth(), cn = src.channels();
1297 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
1298 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
1300 if( src.isContinuous() && mask.empty() )
1302 size_t len = src.total()*cn;
1303 if( len == (size_t)(int)len )
1305 if( depth == CV_32F )
1307 const float* data = src.ptr<float>();
1309 if( normType == NORM_L2 )
1312 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1313 return std::sqrt(result);
1315 if( normType == NORM_L2SQR )
1318 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1321 if( normType == NORM_L1 )
1324 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
1327 if( normType == NORM_INF )
1330 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
1334 if( depth == CV_8U )
1336 const uchar* data = src.ptr<uchar>();
1338 if( normType == NORM_HAMMING )
1339 return normHamming(data, (int)len);
1341 if( normType == NORM_HAMMING2 )
1342 return normHamming(data, (int)len, 2);
1347 CV_Assert( mask.empty() || mask.type() == CV_8U );
1349 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1354 bitwise_and(src, mask, temp);
1355 return norm(temp, normType);
1357 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1359 const Mat* arrays[] = {&src, 0};
1361 NAryMatIterator it(arrays, ptrs);
1362 int total = (int)it.size;
1365 for( size_t i = 0; i < it.nplanes; i++, ++it )
1366 result += normHamming(ptrs[0], total, cellSize);
1371 NormFunc func = normTab[normType >> 1][depth];
1372 CV_Assert( func != 0 );
1374 const Mat* arrays[] = {&src, &mask, 0};
1384 NAryMatIterator it(arrays, ptrs);
1385 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1386 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1387 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
1389 int *ibuf = &result.i;
1394 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
1395 blockSize = std::min(blockSize, intSumBlockSize);
1397 esz = src.elemSize();
1400 for( size_t i = 0; i < it.nplanes; i++, ++it )
1402 for( j = 0; j < total; j += blockSize )
1404 int bsz = std::min(total - j, blockSize);
1405 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
1407 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1419 if( normType == NORM_INF )
1421 if( depth == CV_64F )
1423 else if( depth == CV_32F )
1424 result.d = result.f;
1426 result.d = result.i;
1428 else if( normType == NORM_L2 )
1429 result.d = std::sqrt(result.d);
1435 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1437 if( normType & CV_RELATIVE )
1438 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1440 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1441 int depth = src1.depth(), cn = src1.channels();
1443 CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
1446 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
1447 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1449 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1451 size_t len = src1.total()*src1.channels();
1452 if( len == (size_t)(int)len )
1454 if( src1.depth() == CV_32F )
1456 const float* data1 = src1.ptr<float>();
1457 const float* data2 = src2.ptr<float>();
1459 if( normType == NORM_L2 )
1462 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1463 return std::sqrt(result);
1465 if( normType == NORM_L2SQR )
1468 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1471 if( normType == NORM_L1 )
1474 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1477 if( normType == NORM_INF )
1480 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1487 CV_Assert( mask.empty() || mask.type() == CV_8U );
1489 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1494 bitwise_xor(src1, src2, temp);
1495 bitwise_and(temp, mask, temp);
1496 return norm(temp, normType);
1498 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1500 const Mat* arrays[] = {&src1, &src2, 0};
1502 NAryMatIterator it(arrays, ptrs);
1503 int total = (int)it.size;
1506 for( size_t i = 0; i < it.nplanes; i++, ++it )
1507 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
1512 NormDiffFunc func = normDiffTab[normType >> 1][depth];
1513 CV_Assert( func != 0 );
1515 const Mat* arrays[] = {&src1, &src2, &mask, 0};
1526 NAryMatIterator it(arrays, ptrs);
1527 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1528 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1529 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
1531 unsigned *ibuf = &result.u;
1536 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1537 blockSize = std::min(blockSize, intSumBlockSize);
1539 esz = src1.elemSize();
1542 for( size_t i = 0; i < it.nplanes; i++, ++it )
1544 for( j = 0; j < total; j += blockSize )
1546 int bsz = std::min(total - j, blockSize);
1547 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
1549 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1562 if( normType == NORM_INF )
1564 if( depth == CV_64F )
1566 else if( depth == CV_32F )
1567 result.d = result.f;
1569 result.d = result.u;
1571 else if( normType == NORM_L2 )
1572 result.d = std::sqrt(result.d);
1578 ///////////////////////////////////// batch distance ///////////////////////////////////////
1583 template<typename _Tp, typename _Rt>
1584 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
1585 int nvecs, int len, _Rt* dist, const uchar* mask)
1587 step2 /= sizeof(src2[0]);
1590 for( int i = 0; i < nvecs; i++ )
1591 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
1595 _Rt val0 = std::numeric_limits<_Rt>::max();
1596 for( int i = 0; i < nvecs; i++ )
1597 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1601 template<typename _Tp, typename _Rt>
1602 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
1603 int nvecs, int len, _Rt* dist, const uchar* mask)
1605 step2 /= sizeof(src2[0]);
1608 for( int i = 0; i < nvecs; i++ )
1609 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
1613 _Rt val0 = std::numeric_limits<_Rt>::max();
1614 for( int i = 0; i < nvecs; i++ )
1615 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1619 template<typename _Tp, typename _Rt>
1620 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
1621 int nvecs, int len, _Rt* dist, const uchar* mask)
1623 step2 /= sizeof(src2[0]);
1626 for( int i = 0; i < nvecs; i++ )
1627 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
1631 _Rt val0 = std::numeric_limits<_Rt>::max();
1632 for( int i = 0; i < nvecs; i++ )
1633 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
1637 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
1638 int nvecs, int len, int* dist, const uchar* mask)
1640 step2 /= sizeof(src2[0]);
1643 for( int i = 0; i < nvecs; i++ )
1644 dist[i] = normHamming(src1, src2 + step2*i, len);
1649 for( int i = 0; i < nvecs; i++ )
1650 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
1654 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
1655 int nvecs, int len, int* dist, const uchar* mask)
1657 step2 /= sizeof(src2[0]);
1660 for( int i = 0; i < nvecs; i++ )
1661 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
1666 for( int i = 0; i < nvecs; i++ )
1667 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
1671 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1672 int nvecs, int len, int* dist, const uchar* mask)
1674 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1677 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1678 int nvecs, int len, float* dist, const uchar* mask)
1680 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1683 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1684 int nvecs, int len, int* dist, const uchar* mask)
1686 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1689 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1690 int nvecs, int len, float* dist, const uchar* mask)
1692 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1695 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1696 int nvecs, int len, float* dist, const uchar* mask)
1698 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1701 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
1702 int nvecs, int len, float* dist, const uchar* mask)
1704 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1707 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
1708 int nvecs, int len, float* dist, const uchar* mask)
1710 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1713 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
1714 int nvecs, int len, float* dist, const uchar* mask)
1716 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1719 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
1720 int nvecs, int len, uchar* dist, const uchar* mask);
1723 struct BatchDistInvoker : public ParallelLoopBody
1725 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
1726 Mat& _dist, Mat& _nidx, int _K,
1727 const Mat& _mask, int _update,
1728 BatchDistFunc _func)
1740 void operator()(const Range& range) const
1742 AutoBuffer<int> buf(src2->rows);
1745 for( int i = range.start; i < range.end; i++ )
1747 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
1748 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
1752 int* nidxptr = nidx->ptr<int>(i);
1753 // since positive float's can be compared just like int's,
1754 // we handle both CV_32S and CV_32F cases with a single branch
1755 int* distptr = (int*)dist->ptr(i);
1759 for( j = 0; j < src2->rows; j++ )
1762 if( d < distptr[K-1] )
1764 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
1766 nidxptr[k+1] = nidxptr[k];
1767 distptr[k+1] = distptr[k];
1769 nidxptr[k+1] = j + update;
1789 void cv::batchDistance( InputArray _src1, InputArray _src2,
1790 OutputArray _dist, int dtype, OutputArray _nidx,
1791 int normType, int K, InputArray _mask,
1792 int update, bool crosscheck )
1794 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1795 int type = src1.type();
1796 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
1797 (type == CV_32F || type == CV_8U));
1798 CV_Assert( _nidx.needed() == (K > 0) );
1802 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
1804 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
1806 K = std::min(K, src2.rows);
1808 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
1809 Mat dist = _dist.getMat(), nidx;
1810 if( _nidx.needed() )
1812 _nidx.create(dist.size(), CV_32S);
1813 nidx = _nidx.getMat();
1816 if( update == 0 && K > 0 )
1818 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
1819 nidx = Scalar::all(-1);
1824 CV_Assert( K == 1 && update == 0 && mask.empty() );
1826 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
1828 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
1829 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
1830 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
1831 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
1832 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
1833 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
1834 if( dtype == CV_32S )
1836 for( int i = 0; i < tdist.rows; i++ )
1838 int idx = tidx.at<int>(i);
1839 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
1842 dist.at<int>(idx) = d;
1843 nidx.at<int>(idx) = i + update;
1849 for( int i = 0; i < tdist.rows; i++ )
1851 int idx = tidx.at<int>(i);
1852 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
1855 dist.at<float>(idx) = d;
1856 nidx.at<int>(idx) = i + update;
1863 BatchDistFunc func = 0;
1866 if( normType == NORM_L1 && dtype == CV_32S )
1867 func = (BatchDistFunc)batchDistL1_8u32s;
1868 else if( normType == NORM_L1 && dtype == CV_32F )
1869 func = (BatchDistFunc)batchDistL1_8u32f;
1870 else if( normType == NORM_L2SQR && dtype == CV_32S )
1871 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
1872 else if( normType == NORM_L2SQR && dtype == CV_32F )
1873 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
1874 else if( normType == NORM_L2 && dtype == CV_32F )
1875 func = (BatchDistFunc)batchDistL2_8u32f;
1876 else if( normType == NORM_HAMMING && dtype == CV_32S )
1877 func = (BatchDistFunc)batchDistHamming;
1878 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
1879 func = (BatchDistFunc)batchDistHamming2;
1881 else if( type == CV_32F && dtype == CV_32F )
1883 if( normType == NORM_L1 )
1884 func = (BatchDistFunc)batchDistL1_32f;
1885 else if( normType == NORM_L2SQR )
1886 func = (BatchDistFunc)batchDistL2Sqr_32f;
1887 else if( normType == NORM_L2 )
1888 func = (BatchDistFunc)batchDistL2_32f;
1892 CV_Error_(CV_StsUnsupportedFormat,
1893 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
1894 type, dtype, normType));
1896 parallel_for_(Range(0, src1.rows),
1897 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
1901 void cv::findNonZero( InputArray _src, OutputArray _idx )
1903 Mat src = _src.getMat();
1904 CV_Assert( src.type() == CV_8UC1 );
1905 int n = countNonZero(src);
1906 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
1908 _idx.create(n, 1, CV_32SC2);
1909 Mat idx = _idx.getMat();
1910 CV_Assert(idx.isContinuous());
1911 Point* idx_ptr = (Point*)idx.data;
1913 for( int i = 0; i < src.rows; i++ )
1915 const uchar* bin_ptr = src.ptr(i);
1916 for( int j = 0; j < src.cols; j++ )
1918 *idx_ptr++ = Point(j, i);
1923 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
1925 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
1926 if( CV_IS_IMAGE(srcarr) )
1928 int coi = cvGetImageCOI((IplImage*)srcarr);
1931 CV_Assert( 0 < coi && coi <= 4 );
1932 sum = cv::Scalar(sum[coi-1]);
1938 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
1940 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1941 if( img.channels() > 1 )
1942 cv::extractImageCOI(imgarr, img);
1943 return countNonZero(img);
1948 cvAvg( const void* imgarr, const void* maskarr )
1950 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1951 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
1952 if( CV_IS_IMAGE(imgarr) )
1954 int coi = cvGetImageCOI((IplImage*)imgarr);
1957 CV_Assert( 0 < coi && coi <= 4 );
1958 mean = cv::Scalar(mean[coi-1]);
1966 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
1968 cv::Scalar mean, sdv;
1972 mask = cv::cvarrToMat(maskarr);
1974 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
1976 if( CV_IS_IMAGE(imgarr) )
1978 int coi = cvGetImageCOI((IplImage*)imgarr);
1981 CV_Assert( 0 < coi && coi <= 4 );
1982 mean = cv::Scalar(mean[coi-1]);
1983 sdv = cv::Scalar(sdv[coi-1]);
1988 *(cv::Scalar*)_mean = mean;
1990 *(cv::Scalar*)_sdv = sdv;
1995 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
1996 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
1998 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
2000 mask = cv::cvarrToMat(maskarr);
2001 if( img.channels() > 1 )
2002 cv::extractImageCOI(imgarr, img);
2004 cv::minMaxLoc( img, _minVal, _maxVal,
2005 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
2010 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
2019 a = cv::cvarrToMat(imgA, false, true, 1);
2021 mask = cv::cvarrToMat(maskarr);
2023 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
2024 cv::extractImageCOI(imgA, a);
2027 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
2029 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
2030 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
2031 cv::extractImageCOI(imgB, b);
2033 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);