1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
49 template<typename T> static inline Scalar rawToScalar(const T& v)
52 typedef typename DataType<T>::channel_type T1;
53 int i, n = DataType<T>::channels;
54 for( i = 0; i < n; i++ )
55 s.val[i] = ((T1*)&v)[i];
59 /****************************************************************************************\
61 \****************************************************************************************/
63 template<typename T, typename ST>
64 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
75 #if CV_ENABLE_UNROLLED
76 for(; i <= len - 4; i += 4, src += cn*4 )
77 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
79 for( ; i < len; i++, src += cn )
85 ST s0 = dst[0], s1 = dst[1];
86 for( i = 0; i < len; i++, src += cn )
96 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
97 for( i = 0; i < len; i++, src += cn )
108 for( ; k < cn; k += 4 )
111 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
112 for( i = 0; i < len; i++, src += cn )
114 s0 += src[0]; s1 += src[1];
115 s2 += src[2]; s3 += src[3];
129 for( i = 0; i < len; i++ )
139 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
140 for( i = 0; i < len; i++, src += 3 )
154 for( i = 0; i < len; i++, src += cn )
158 #if CV_ENABLE_UNROLLED
159 for( ; k <= cn - 4; k += 4 )
162 s0 = dst[k] + src[k];
163 s1 = dst[k+1] + src[k+1];
164 dst[k] = s0; dst[k+1] = s1;
165 s0 = dst[k+2] + src[k+2];
166 s1 = dst[k+3] + src[k+3];
167 dst[k+2] = s0; dst[k+3] = s1;
179 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
180 { return sum_(src, mask, dst, len, cn); }
182 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
202 static SumFunc sumTab[] =
204 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
205 (SumFunc)sum16u, (SumFunc)sum16s,
207 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
212 static int countNonZero_(const T* src, int len )
215 #if CV_ENABLE_UNROLLED
216 for(; i <= len - 4; i += 4 )
217 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
219 for( ; i < len; i++ )
224 static int countNonZero8u( const uchar* src, int len )
225 { return countNonZero_(src, len); }
227 static int countNonZero16u( const ushort* src, int len )
228 { return countNonZero_(src, len); }
230 static int countNonZero32s( const int* src, int len )
231 { return countNonZero_(src, len); }
233 static int countNonZero32f( const float* src, int len )
234 { return countNonZero_(src, len); }
236 static int countNonZero64f( const double* src, int len )
237 { return countNonZero_(src, len); }
239 typedef int (*CountNonZeroFunc)(const uchar*, int);
241 static CountNonZeroFunc countNonZeroTab[] =
243 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
244 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
245 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
246 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
250 template<typename T, typename ST, typename SQT>
251 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
264 for( i = 0; i < len; i++, src += cn )
267 s0 += v; sq0 += (SQT)v*v;
274 ST s0 = sum[0], s1 = sum[1];
275 SQT sq0 = sqsum[0], sq1 = sqsum[1];
276 for( i = 0; i < len; i++, src += cn )
278 T v0 = src[0], v1 = src[1];
279 s0 += v0; sq0 += (SQT)v0*v0;
280 s1 += v1; sq1 += (SQT)v1*v1;
282 sum[0] = s0; sum[1] = s1;
283 sqsum[0] = sq0; sqsum[1] = sq1;
287 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
288 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
289 for( i = 0; i < len; i++, src += cn )
291 T v0 = src[0], v1 = src[1], v2 = src[2];
292 s0 += v0; sq0 += (SQT)v0*v0;
293 s1 += v1; sq1 += (SQT)v1*v1;
294 s2 += v2; sq2 += (SQT)v2*v2;
296 sum[0] = s0; sum[1] = s1; sum[2] = s2;
297 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
300 for( ; k < cn; k += 4 )
303 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
304 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
305 for( i = 0; i < len; i++, src += cn )
308 v0 = src[0], v1 = src[1];
309 s0 += v0; sq0 += (SQT)v0*v0;
310 s1 += v1; sq1 += (SQT)v1*v1;
311 v0 = src[2], v1 = src[3];
312 s2 += v0; sq2 += (SQT)v0*v0;
313 s3 += v1; sq3 += (SQT)v1*v1;
315 sum[k] = s0; sum[k+1] = s1;
316 sum[k+2] = s2; sum[k+3] = s3;
317 sqsum[k] = sq0; sqsum[k+1] = sq1;
318 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
329 for( i = 0; i < len; i++ )
333 s0 += v; sq0 += (SQT)v*v;
341 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
342 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
343 for( i = 0; i < len; i++, src += 3 )
346 T v0 = src[0], v1 = src[1], v2 = src[2];
347 s0 += v0; sq0 += (SQT)v0*v0;
348 s1 += v1; sq1 += (SQT)v1*v1;
349 s2 += v2; sq2 += (SQT)v2*v2;
352 sum[0] = s0; sum[1] = s1; sum[2] = s2;
353 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
357 for( i = 0; i < len; i++, src += cn )
360 for( int k = 0; k < cn; k++ )
364 SQT sq = sqsum[k] + (SQT)v*v;
365 sum[k] = s; sqsum[k] = sq;
374 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
375 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
377 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
378 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
380 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
381 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
383 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
384 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
386 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
387 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
389 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
390 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
392 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
393 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
395 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
397 static SumSqrFunc sumSqrTab[] =
399 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
400 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
405 cv::Scalar cv::sum( InputArray _src )
407 Mat src = _src.getMat();
408 int k, cn = src.channels(), depth = src.depth();
409 SumFunc func = sumTab[depth];
411 CV_Assert( cn <= 4 && func != 0 );
413 const Mat* arrays[] = {&src, 0};
415 NAryMatIterator it(arrays, ptrs);
417 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
419 AutoBuffer<int> _buf;
420 int* buf = (int*)&s[0];
422 bool blockSum = depth < CV_32S;
426 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
427 blockSize = std::min(blockSize, intSumBlockSize);
431 for( k = 0; k < cn; k++ )
433 esz = src.elemSize();
436 for( size_t i = 0; i < it.nplanes; i++, ++it )
438 for( j = 0; j < total; j += blockSize )
440 int bsz = std::min(total - j, blockSize);
441 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
443 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
445 for( k = 0; k < cn; k++ )
458 int cv::countNonZero( InputArray _src )
460 Mat src = _src.getMat();
461 CountNonZeroFunc func = countNonZeroTab[src.depth()];
463 CV_Assert( src.channels() == 1 && func != 0 );
465 const Mat* arrays[] = {&src, 0};
467 NAryMatIterator it(arrays, ptrs);
468 int total = (int)it.size, nz = 0;
470 for( size_t i = 0; i < it.nplanes; i++, ++it )
471 nz += func( ptrs[0], total );
476 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
478 Mat src = _src.getMat(), mask = _mask.getMat();
479 CV_Assert( mask.empty() || mask.type() == CV_8U );
481 int k, cn = src.channels(), depth = src.depth();
482 SumFunc func = sumTab[depth];
484 CV_Assert( cn <= 4 && func != 0 );
486 const Mat* arrays[] = {&src, &mask, 0};
488 NAryMatIterator it(arrays, ptrs);
490 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
492 AutoBuffer<int> _buf;
493 int* buf = (int*)&s[0];
494 bool blockSum = depth <= CV_16S;
495 size_t esz = 0, nz0 = 0;
499 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
500 blockSize = std::min(blockSize, intSumBlockSize);
504 for( k = 0; k < cn; k++ )
506 esz = src.elemSize();
509 for( size_t i = 0; i < it.nplanes; i++, ++it )
511 for( j = 0; j < total; j += blockSize )
513 int bsz = std::min(total - j, blockSize);
514 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
517 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
519 for( k = 0; k < cn; k++ )
531 return s*(nz0 ? 1./nz0 : 0);
535 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
537 Mat src = _src.getMat(), mask = _mask.getMat();
538 CV_Assert( mask.empty() || mask.type() == CV_8U );
540 int k, cn = src.channels(), depth = src.depth();
541 SumSqrFunc func = sumSqrTab[depth];
543 CV_Assert( func != 0 );
545 const Mat* arrays[] = {&src, &mask, 0};
547 NAryMatIterator it(arrays, ptrs);
548 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
549 int j, count = 0, nz0 = 0;
550 AutoBuffer<double> _buf(cn*4);
551 double *s = (double*)_buf, *sq = s + cn;
552 int *sbuf = (int*)s, *sqbuf = (int*)sq;
553 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
556 for( k = 0; k < cn; k++ )
561 intSumBlockSize = 1 << 15;
562 blockSize = std::min(blockSize, intSumBlockSize);
563 sbuf = (int*)(sq + cn);
566 for( k = 0; k < cn; k++ )
567 sbuf[k] = sqbuf[k] = 0;
568 esz = src.elemSize();
571 for( size_t i = 0; i < it.nplanes; i++, ++it )
573 for( j = 0; j < total; j += blockSize )
575 int bsz = std::min(total - j, blockSize);
576 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
579 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
581 for( k = 0; k < cn; k++ )
588 for( k = 0; k < cn; k++ )
602 double scale = nz0 ? 1./nz0 : 0.;
603 for( k = 0; k < cn; k++ )
606 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
609 for( j = 0; j < 2; j++ )
611 const double* sptr = j == 0 ? s : sq;
612 _OutputArray _dst = j == 0 ? _mean : _sdv;
616 if( !_dst.fixedSize() )
617 _dst.create(cn, 1, CV_64F, -1, true);
618 Mat dst = _dst.getMat();
619 int dcn = (int)dst.total();
620 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
621 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
622 double* dptr = dst.ptr<double>();
623 for( k = 0; k < cn; k++ )
625 for( ; k < dcn; k++ )
630 /****************************************************************************************\
632 \****************************************************************************************/
637 template<typename T, typename WT> static void
638 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
639 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
641 WT minVal = *_minVal, maxVal = *_maxVal;
642 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
646 for( int i = 0; i < len; i++ )
652 minIdx = startIdx + i;
657 maxIdx = startIdx + i;
663 for( int i = 0; i < len; i++ )
666 if( mask[i] && val < minVal )
669 minIdx = startIdx + i;
671 if( mask[i] && val > maxVal )
674 maxIdx = startIdx + i;
685 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
686 size_t* minidx, size_t* maxidx, int len, size_t startidx )
687 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
689 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
690 size_t* minidx, size_t* maxidx, int len, size_t startidx )
691 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
693 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
694 size_t* minidx, size_t* maxidx, int len, size_t startidx )
695 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
697 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
698 size_t* minidx, size_t* maxidx, int len, size_t startidx )
699 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
701 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
702 size_t* minidx, size_t* maxidx, int len, size_t startidx )
703 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
705 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
706 size_t* minidx, size_t* maxidx, int len, size_t startidx )
707 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
709 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
710 size_t* minidx, size_t* maxidx, int len, size_t startidx )
711 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
713 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
715 static MinMaxIdxFunc minmaxTab[] =
717 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
718 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
719 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
720 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
724 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
730 for( i = d-1; i >= 0; i-- )
733 idx[i] = (int)(ofs % sz);
739 for( i = d-1; i >= 0; i-- )
746 void cv::minMaxIdx(InputArray _src, double* minVal,
747 double* maxVal, int* minIdx, int* maxIdx,
750 Mat src = _src.getMat(), mask = _mask.getMat();
751 int depth = src.depth(), cn = src.channels();
753 CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) ||
754 (cn >= 1 && mask.empty() && !minIdx && !maxIdx) );
755 MinMaxIdxFunc func = minmaxTab[depth];
756 CV_Assert( func != 0 );
758 const Mat* arrays[] = {&src, &mask, 0};
760 NAryMatIterator it(arrays, ptrs);
762 size_t minidx = 0, maxidx = 0;
763 int iminval = INT_MAX, imaxval = INT_MIN;
764 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
765 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
767 int *minval = &iminval, *maxval = &imaxval;
768 int planeSize = (int)it.size*cn;
770 if( depth == CV_32F )
771 minval = (int*)&fminval, maxval = (int*)&fmaxval;
772 else if( depth == CV_64F )
773 minval = (int*)&dminval, maxval = (int*)&dmaxval;
775 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
776 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
779 dminval = dmaxval = 0;
780 else if( depth == CV_32F )
781 dminval = fminval, dmaxval = fmaxval;
782 else if( depth <= CV_32S )
783 dminval = iminval, dmaxval = imaxval;
791 ofs2idx(src, minidx, minIdx);
793 ofs2idx(src, maxidx, maxIdx);
796 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
797 Point* minLoc, Point* maxLoc, InputArray mask )
799 Mat img = _img.getMat();
800 CV_Assert(img.dims <= 2);
802 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
804 std::swap(minLoc->x, minLoc->y);
806 std::swap(maxLoc->x, maxLoc->y);
809 /****************************************************************************************\
811 \****************************************************************************************/
816 float normL2Sqr_(const float* a, const float* b, int n)
818 int j = 0; float d = 0.f;
822 float CV_DECL_ALIGNED(16) buf[4];
823 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
825 for( ; j <= n - 8; j += 8 )
827 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
828 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
829 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
830 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
832 _mm_store_ps(buf, _mm_add_ps(d0, d1));
833 d = buf[0] + buf[1] + buf[2] + buf[3];
837 //vz why do we need unroll here? no sse = no need to unroll
839 for( ; j <= n - 4; j += 4 )
841 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
842 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
848 float t = a[j] - b[j];
855 float normL1_(const float* a, const float* b, int n)
857 int j = 0; float d = 0.f;
861 float CV_DECL_ALIGNED(16) buf[4];
862 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
863 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
864 __m128 absmask = _mm_load_ps((const float*)absbuf);
866 for( ; j <= n - 8; j += 8 )
868 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
869 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
870 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
871 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
873 _mm_store_ps(buf, _mm_add_ps(d0, d1));
874 d = buf[0] + buf[1] + buf[2] + buf[3];
878 //vz no need to unroll here - if no sse
880 for( ; j <= n - 4; j += 4 )
882 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
883 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
888 d += std::abs(a[j] - b[j]);
892 int normL1_(const uchar* a, const uchar* b, int n)
898 __m128i d0 = _mm_setzero_si128();
900 for( ; j <= n - 16; j += 16 )
902 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
903 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
905 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
908 for( ; j <= n - 4; j += 4 )
910 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
911 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
913 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
915 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
919 //vz why do we need unroll here? no sse = no unroll
921 for( ; j <= n - 4; j += 4 )
923 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
924 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
928 d += std::abs(a[j] - b[j]);
932 static const uchar popCountTable[] =
934 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
935 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
936 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
937 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
938 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
939 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
940 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
941 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
944 static const uchar popCountTable2[] =
946 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
947 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
948 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
949 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
950 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
951 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
952 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
953 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
956 static const uchar popCountTable4[] =
958 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
959 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
960 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
961 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
962 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
963 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
964 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
965 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
968 int normHamming(const uchar* a, const uchar* b, int n)
970 int i = 0, result = 0;
972 if (CPU_HAS_NEON_FEATURE)
974 uint32x4_t bits = vmovq_n_u32(0);
975 for (; i <= n - 16; i += 16) {
976 uint8x16_t A_vec = vld1q_u8 (a + i);
977 uint8x16_t B_vec = vld1q_u8 (b + i);
978 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
979 uint8x16_t bitsSet = vcntq_u8 (AxorB);
980 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
981 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
982 bits = vaddq_u32(bits, bitSet4);
984 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
985 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
986 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
990 for( ; i <= n - 4; i += 4 )
991 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
992 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
994 result += popCountTable[a[i] ^ b[i]];
998 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1001 return normHamming(a, b, n);
1002 const uchar* tab = 0;
1004 tab = popCountTable2;
1005 else if( cellSize == 4 )
1006 tab = popCountTable4;
1008 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1009 int i = 0, result = 0;
1010 #if CV_ENABLE_UNROLLED
1011 for( ; i <= n - 4; i += 4 )
1012 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1013 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1016 result += tab[a[i] ^ b[i]];
1021 template<typename T, typename ST> int
1022 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1024 ST result = *_result;
1027 result = std::max(result, normInf<T, ST>(src, len*cn));
1031 for( int i = 0; i < len; i++, src += cn )
1034 for( int k = 0; k < cn; k++ )
1035 result = std::max(result, ST(fast_abs(src[k])));
1042 template<typename T, typename ST> int
1043 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1045 ST result = *_result;
1048 result += normL1<T, ST>(src, len*cn);
1052 for( int i = 0; i < len; i++, src += cn )
1055 for( int k = 0; k < cn; k++ )
1056 result += fast_abs(src[k]);
1063 template<typename T, typename ST> int
1064 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1066 ST result = *_result;
1069 result += normL2Sqr<T, ST>(src, len*cn);
1073 for( int i = 0; i < len; i++, src += cn )
1076 for( int k = 0; k < cn; k++ )
1087 template<typename T, typename ST> int
1088 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1090 ST result = *_result;
1093 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
1097 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1100 for( int k = 0; k < cn; k++ )
1101 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
1108 template<typename T, typename ST> int
1109 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1111 ST result = *_result;
1114 result += normL1<T, ST>(src1, src2, len*cn);
1118 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1121 for( int k = 0; k < cn; k++ )
1122 result += std::abs(src1[k] - src2[k]);
1129 template<typename T, typename ST> int
1130 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1132 ST result = *_result;
1135 result += normL2Sqr<T, ST>(src1, src2, len*cn);
1139 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1142 for( int k = 0; k < cn; k++ )
1144 ST v = src1[k] - src2[k];
1154 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
1155 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
1156 { return norm##L##_(src, mask, r, len, cn); } \
1157 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
1158 const uchar* mask, ntype* r, int len, int cn) \
1159 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
1161 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
1162 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
1163 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
1164 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
1166 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
1167 CV_DEF_NORM_ALL(8s, schar, int, int, int)
1168 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
1169 CV_DEF_NORM_ALL(16s, short, int, int, double)
1170 CV_DEF_NORM_ALL(32s, int, int, double, double)
1171 CV_DEF_NORM_ALL(32f, float, float, double, double)
1172 CV_DEF_NORM_ALL(64f, double, double, double, double)
1175 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
1176 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
1178 static NormFunc normTab[3][8] =
1181 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)normInf_8s, (NormFunc)normInf_16u, (NormFunc)normInf_16s,
1182 (NormFunc)normInf_32s, (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
1185 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)normL1_8s, (NormFunc)normL1_16u, (NormFunc)normL1_16s,
1186 (NormFunc)normL1_32s, (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
1189 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)normL2_8s, (NormFunc)normL2_16u, (NormFunc)normL2_16s,
1190 (NormFunc)normL2_32s, (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
1194 static NormDiffFunc normDiffTab[3][8] =
1197 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
1198 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
1199 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
1200 (NormDiffFunc)normDiffInf_64f, 0
1203 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
1204 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
1205 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
1206 (NormDiffFunc)normDiffL1_64f, 0
1209 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
1210 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
1211 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
1212 (NormDiffFunc)normDiffL2_64f, 0
1218 double cv::norm( InputArray _src, int normType, InputArray _mask )
1220 Mat src = _src.getMat(), mask = _mask.getMat();
1221 int depth = src.depth(), cn = src.channels();
1224 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
1226 if( depth == CV_32F && src.isContinuous() && mask.empty() )
1228 size_t len = src.total()*cn;
1229 if( len == (size_t)(int)len )
1231 const float* data = src.ptr<float>();
1233 if( normType == NORM_L2 )
1236 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1237 return std::sqrt(result);
1239 if( normType == NORM_L1 )
1242 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
1247 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
1254 CV_Assert( mask.empty() || mask.type() == CV_8U );
1256 NormFunc func = normTab[normType >> 1][depth];
1257 CV_Assert( func != 0 );
1259 const Mat* arrays[] = {&src, &mask, 0};
1269 NAryMatIterator it(arrays, ptrs);
1270 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1271 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1272 (normType == NORM_L2 && depth <= CV_8S);
1274 int *ibuf = &result.i;
1279 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
1280 blockSize = std::min(blockSize, intSumBlockSize);
1282 esz = src.elemSize();
1285 for( size_t i = 0; i < it.nplanes; i++, ++it )
1287 for( j = 0; j < total; j += blockSize )
1289 int bsz = std::min(total - j, blockSize);
1290 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
1292 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1304 if( normType == NORM_INF )
1306 if( depth == CV_64F )
1308 else if( depth == CV_32F )
1309 result.d = result.f;
1311 result.d = result.i;
1313 else if( normType == NORM_L2 )
1314 result.d = std::sqrt(result.d);
1320 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1322 if( normType & CV_RELATIVE )
1323 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1325 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1326 int depth = src1.depth(), cn = src1.channels();
1328 CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
1331 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
1333 if( src1.depth() == CV_32F && src1.isContinuous() && src2.isContinuous() && mask.empty() )
1335 size_t len = src1.total()*src1.channels();
1336 if( len == (size_t)(int)len )
1338 const float* data1 = src1.ptr<float>();
1339 const float* data2 = src2.ptr<float>();
1341 if( normType == NORM_L2 )
1344 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1345 return std::sqrt(result);
1347 if( normType == NORM_L1 )
1350 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1355 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1361 CV_Assert( mask.empty() || mask.type() == CV_8U );
1363 NormDiffFunc func = normDiffTab[normType >> 1][depth];
1364 CV_Assert( func != 0 );
1366 const Mat* arrays[] = {&src1, &src2, &mask, 0};
1377 NAryMatIterator it(arrays, ptrs);
1378 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1379 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1380 (normType == NORM_L2 && depth <= CV_8S);
1382 unsigned *ibuf = &result.u;
1387 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1388 blockSize = std::min(blockSize, intSumBlockSize);
1390 esz = src1.elemSize();
1393 for( size_t i = 0; i < it.nplanes; i++, ++it )
1395 for( j = 0; j < total; j += blockSize )
1397 int bsz = std::min(total - j, blockSize);
1398 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
1400 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1413 if( normType == NORM_INF )
1415 if( depth == CV_64F )
1417 else if( depth == CV_32F )
1418 result.d = result.f;
1420 result.d = result.u;
1422 else if( normType == NORM_L2 )
1423 result.d = std::sqrt(result.d);
1429 ///////////////////////////////////// batch distance ///////////////////////////////////////
1434 template<typename _Tp, typename _Rt>
1435 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
1436 int nvecs, int len, _Rt* dist, const uchar* mask)
1438 step2 /= sizeof(src2[0]);
1441 for( int i = 0; i < nvecs; i++ )
1442 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
1446 _Rt val0 = std::numeric_limits<_Rt>::max();
1447 for( int i = 0; i < nvecs; i++ )
1448 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1452 template<typename _Tp, typename _Rt>
1453 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
1454 int nvecs, int len, _Rt* dist, const uchar* mask)
1456 step2 /= sizeof(src2[0]);
1459 for( int i = 0; i < nvecs; i++ )
1460 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
1464 _Rt val0 = std::numeric_limits<_Rt>::max();
1465 for( int i = 0; i < nvecs; i++ )
1466 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1470 template<typename _Tp, typename _Rt>
1471 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
1472 int nvecs, int len, _Rt* dist, const uchar* mask)
1474 step2 /= sizeof(src2[0]);
1477 for( int i = 0; i < nvecs; i++ )
1478 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
1482 _Rt val0 = std::numeric_limits<_Rt>::max();
1483 for( int i = 0; i < nvecs; i++ )
1484 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
1488 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
1489 int nvecs, int len, int* dist, const uchar* mask)
1491 step2 /= sizeof(src2[0]);
1494 for( int i = 0; i < nvecs; i++ )
1495 dist[i] = normHamming(src1, src2 + step2*i, len);
1500 for( int i = 0; i < nvecs; i++ )
1501 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
1505 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
1506 int nvecs, int len, int* dist, const uchar* mask)
1508 step2 /= sizeof(src2[0]);
1511 for( int i = 0; i < nvecs; i++ )
1512 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
1517 for( int i = 0; i < nvecs; i++ )
1518 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
1522 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1523 int nvecs, int len, int* dist, const uchar* mask)
1525 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1528 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1529 int nvecs, int len, float* dist, const uchar* mask)
1531 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1534 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1535 int nvecs, int len, int* dist, const uchar* mask)
1537 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1540 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1541 int nvecs, int len, float* dist, const uchar* mask)
1543 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1546 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1547 int nvecs, int len, float* dist, const uchar* mask)
1549 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1552 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
1553 int nvecs, int len, float* dist, const uchar* mask)
1555 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1558 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
1559 int nvecs, int len, float* dist, const uchar* mask)
1561 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1564 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
1565 int nvecs, int len, float* dist, const uchar* mask)
1567 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1570 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
1571 int nvecs, int len, uchar* dist, const uchar* mask);
1574 struct BatchDistInvoker
1576 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
1577 Mat& _dist, Mat& _nidx, int _K,
1578 const Mat& _mask, int _update,
1579 BatchDistFunc _func)
1591 void operator()(const BlockedRange& range) const
1593 AutoBuffer<int> buf(src2->rows);
1596 for( int i = range.begin(); i < range.end(); i++ )
1598 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
1599 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
1603 int* nidxptr = nidx->ptr<int>(i);
1604 // since positive float's can be compared just like int's,
1605 // we handle both CV_32S and CV_32F cases with a single branch
1606 int* distptr = (int*)dist->ptr(i);
1609 for( k0 = 0; k0 < K; k0++ )
1610 if( nidxptr[k0] < 0 )
1612 k0_ = std::max(k0, 1);
1614 for( j = 0; j < src2->rows; j++ )
1617 if( d < distptr[k0_-1] )
1619 for( k = std::min(k0-1, K-2); k >= 0 && distptr[k] > d; k-- )
1621 nidxptr[k+1] = nidxptr[k];
1622 distptr[k+1] = distptr[k];
1624 nidxptr[k+1] = j + update;
1626 k0_ = k0 = std::min(k0 + 1, K);
1645 void cv::batchDistance( InputArray _src1, InputArray _src2,
1646 OutputArray _dist, int dtype, OutputArray _nidx,
1647 int normType, int K, InputArray _mask,
1648 int update, bool crosscheck )
1650 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1651 int type = src1.type();
1652 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
1653 (type == CV_32F || type == CV_8U));
1654 CV_Assert( _nidx.needed() == (K > 0) );
1658 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
1660 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
1662 K = std::min(K, src2.rows);
1664 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
1665 Mat dist = _dist.getMat(), nidx;
1666 if( _nidx.needed() )
1668 _nidx.create(dist.size(), CV_32S);
1669 nidx = _nidx.getMat();
1672 if( update == 0 && K > 0 )
1674 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
1675 nidx = Scalar::all(-1);
1680 CV_Assert( K == 1 && update == 0 && mask.empty() );
1682 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
1684 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
1685 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
1686 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
1687 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
1688 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
1689 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
1690 if( dtype == CV_32S )
1692 for( int i = 0; i < tdist.rows; i++ )
1694 int idx = tidx.at<int>(i);
1695 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
1698 dist.at<int>(idx) = d0;
1699 nidx.at<int>(idx) = i + update;
1705 for( int i = 0; i < tdist.rows; i++ )
1707 int idx = tidx.at<int>(i);
1708 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
1711 dist.at<float>(idx) = d0;
1712 nidx.at<int>(idx) = i + update;
1719 BatchDistFunc func = 0;
1722 if( normType == NORM_L1 && dtype == CV_32S )
1723 func = (BatchDistFunc)batchDistL1_8u32s;
1724 else if( normType == NORM_L1 && dtype == CV_32F )
1725 func = (BatchDistFunc)batchDistL1_8u32f;
1726 else if( normType == NORM_L2SQR && dtype == CV_32S )
1727 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
1728 else if( normType == NORM_L2SQR && dtype == CV_32F )
1729 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
1730 else if( normType == NORM_L2 && dtype == CV_32F )
1731 func = (BatchDistFunc)batchDistL2_8u32f;
1732 else if( normType == NORM_HAMMING && dtype == CV_32S )
1733 func = (BatchDistFunc)batchDistHamming;
1734 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
1735 func = (BatchDistFunc)batchDistHamming2;
1737 else if( type == CV_32F && dtype == CV_32F )
1739 if( normType == NORM_L1 )
1740 func = (BatchDistFunc)batchDistL1_32f;
1741 else if( normType == NORM_L2SQR )
1742 func = (BatchDistFunc)batchDistL2Sqr_32f;
1743 else if( normType == NORM_L2 )
1744 func = (BatchDistFunc)batchDistL2_32f;
1748 CV_Error_(CV_StsUnsupportedFormat,
1749 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
1750 type, dtype, normType));
1752 parallel_for(BlockedRange(0, src1.rows),
1753 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
1757 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
1759 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
1760 if( CV_IS_IMAGE(srcarr) )
1762 int coi = cvGetImageCOI((IplImage*)srcarr);
1765 CV_Assert( 0 < coi && coi <= 4 );
1766 sum = cv::Scalar(sum[coi-1]);
1772 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
1774 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1775 if( img.channels() > 1 )
1776 cv::extractImageCOI(imgarr, img);
1777 return countNonZero(img);
1782 cvAvg( const void* imgarr, const void* maskarr )
1784 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
1785 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
1786 if( CV_IS_IMAGE(imgarr) )
1788 int coi = cvGetImageCOI((IplImage*)imgarr);
1791 CV_Assert( 0 < coi && coi <= 4 );
1792 mean = cv::Scalar(mean[coi-1]);
1800 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
1802 cv::Scalar mean, sdv;
1806 mask = cv::cvarrToMat(maskarr);
1808 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
1810 if( CV_IS_IMAGE(imgarr) )
1812 int coi = cvGetImageCOI((IplImage*)imgarr);
1815 CV_Assert( 0 < coi && coi <= 4 );
1816 mean = cv::Scalar(mean[coi-1]);
1817 sdv = cv::Scalar(sdv[coi-1]);
1822 *(cv::Scalar*)_mean = mean;
1824 *(cv::Scalar*)_sdv = sdv;
1829 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
1830 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
1832 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
1834 mask = cv::cvarrToMat(maskarr);
1835 if( img.channels() > 1 )
1836 cv::extractImageCOI(imgarr, img);
1838 cv::minMaxLoc( img, _minVal, _maxVal,
1839 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
1844 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
1853 a = cv::cvarrToMat(imgA, false, true, 1);
1855 mask = cv::cvarrToMat(maskarr);
1857 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
1858 cv::extractImageCOI(imgA, a);
1861 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
1863 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
1864 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
1865 cv::extractImageCOI(imgB, b);
1867 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);