1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
49 template<typename T> static inline Scalar rawToScalar(const T& v)
52 typedef typename DataType<T>::channel_type T1;
53 int i, n = DataType<T>::channels;
54 for( i = 0; i < n; i++ )
55 s.val[i] = ((T1*)&v)[i];
59 /****************************************************************************************\
61 \****************************************************************************************/
63 template<typename T, typename ST>
64 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
75 #if CV_ENABLE_UNROLLED
76 for(; i <= len - 4; i += 4, src += cn*4 )
77 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
79 for( ; i < len; i++, src += cn )
85 ST s0 = dst[0], s1 = dst[1];
86 for( i = 0; i < len; i++, src += cn )
96 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
97 for( i = 0; i < len; i++, src += cn )
108 for( ; k < cn; k += 4 )
111 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
112 for( i = 0; i < len; i++, src += cn )
114 s0 += src[0]; s1 += src[1];
115 s2 += src[2]; s3 += src[3];
129 for( i = 0; i < len; i++ )
139 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
140 for( i = 0; i < len; i++, src += 3 )
154 for( i = 0; i < len; i++, src += cn )
158 #if CV_ENABLE_UNROLLED
159 for( ; k <= cn - 4; k += 4 )
162 s0 = dst[k] + src[k];
163 s1 = dst[k+1] + src[k+1];
164 dst[k] = s0; dst[k+1] = s1;
165 s0 = dst[k+2] + src[k+2];
166 s1 = dst[k+3] + src[k+3];
167 dst[k+2] = s0; dst[k+3] = s1;
179 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
180 { return sum_(src, mask, dst, len, cn); }
182 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
202 static SumFunc sumTab[] =
204 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
205 (SumFunc)sum16u, (SumFunc)sum16s,
207 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
212 static int countNonZero_(const T* src, int len )
215 #if CV_ENABLE_UNROLLED
216 for(; i <= len - 4; i += 4 )
217 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
219 for( ; i < len; i++ )
224 static int countNonZero8u( const uchar* src, int len )
230 __m128i pattern = _mm_setzero_si128 ();
231 static uchar tab[256];
232 static volatile bool initialized = false;
235 // we compute inverse popcount table,
236 // since we pass (img[x] == 0) mask as index in the table.
237 for( int j = 0; j < 256; j++ )
240 for( int mask = 1; mask < 256; mask += mask )
241 val += (j & mask) == 0;
247 for (; i<=len-16; i+=16)
249 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
250 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
251 nz += tab[val & 255] + tab[val >> 8];
255 for( ; i < len; i++ )
260 static int countNonZero16u( const ushort* src, int len )
261 { return countNonZero_(src, len); }
263 static int countNonZero32s( const int* src, int len )
264 { return countNonZero_(src, len); }
266 static int countNonZero32f( const float* src, int len )
267 { return countNonZero_(src, len); }
269 static int countNonZero64f( const double* src, int len )
270 { return countNonZero_(src, len); }
272 typedef int (*CountNonZeroFunc)(const uchar*, int);
274 static CountNonZeroFunc countNonZeroTab[] =
276 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
277 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
278 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
279 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
283 template<typename T, typename ST, typename SQT>
284 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
297 for( i = 0; i < len; i++, src += cn )
300 s0 += v; sq0 += (SQT)v*v;
307 ST s0 = sum[0], s1 = sum[1];
308 SQT sq0 = sqsum[0], sq1 = sqsum[1];
309 for( i = 0; i < len; i++, src += cn )
311 T v0 = src[0], v1 = src[1];
312 s0 += v0; sq0 += (SQT)v0*v0;
313 s1 += v1; sq1 += (SQT)v1*v1;
315 sum[0] = s0; sum[1] = s1;
316 sqsum[0] = sq0; sqsum[1] = sq1;
320 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
321 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
322 for( i = 0; i < len; i++, src += cn )
324 T v0 = src[0], v1 = src[1], v2 = src[2];
325 s0 += v0; sq0 += (SQT)v0*v0;
326 s1 += v1; sq1 += (SQT)v1*v1;
327 s2 += v2; sq2 += (SQT)v2*v2;
329 sum[0] = s0; sum[1] = s1; sum[2] = s2;
330 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
333 for( ; k < cn; k += 4 )
336 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
337 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
338 for( i = 0; i < len; i++, src += cn )
341 v0 = src[0], v1 = src[1];
342 s0 += v0; sq0 += (SQT)v0*v0;
343 s1 += v1; sq1 += (SQT)v1*v1;
344 v0 = src[2], v1 = src[3];
345 s2 += v0; sq2 += (SQT)v0*v0;
346 s3 += v1; sq3 += (SQT)v1*v1;
348 sum[k] = s0; sum[k+1] = s1;
349 sum[k+2] = s2; sum[k+3] = s3;
350 sqsum[k] = sq0; sqsum[k+1] = sq1;
351 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
362 for( i = 0; i < len; i++ )
366 s0 += v; sq0 += (SQT)v*v;
374 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
375 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
376 for( i = 0; i < len; i++, src += 3 )
379 T v0 = src[0], v1 = src[1], v2 = src[2];
380 s0 += v0; sq0 += (SQT)v0*v0;
381 s1 += v1; sq1 += (SQT)v1*v1;
382 s2 += v2; sq2 += (SQT)v2*v2;
385 sum[0] = s0; sum[1] = s1; sum[2] = s2;
386 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
390 for( i = 0; i < len; i++, src += cn )
393 for( int k = 0; k < cn; k++ )
397 SQT sq = sqsum[k] + (SQT)v*v;
398 sum[k] = s; sqsum[k] = sq;
407 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
408 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
410 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
411 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
413 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
414 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
416 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
417 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
419 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
420 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
422 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
423 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
425 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
426 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
428 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
430 static SumSqrFunc sumSqrTab[] =
432 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
433 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
438 cv::Scalar cv::sum( InputArray _src )
440 Mat src = _src.getMat();
441 int k, cn = src.channels(), depth = src.depth();
443 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
444 size_t total_size = src.total();
445 int rows = src.size[0], cols = (int)(total_size/rows);
446 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
448 IppiSize sz = { cols, rows };
449 int type = src.type();
450 typedef IppStatus (CV_STDCALL* ippiSumFunc)(const void*, int, IppiSize, double *, int);
451 ippiSumFunc ippFunc =
452 type == CV_8UC1 ? (ippiSumFunc)ippiSum_8u_C1R :
453 type == CV_8UC3 ? (ippiSumFunc)ippiSum_8u_C3R :
454 type == CV_8UC4 ? (ippiSumFunc)ippiSum_8u_C4R :
455 type == CV_16UC1 ? (ippiSumFunc)ippiSum_16u_C1R :
456 type == CV_16UC3 ? (ippiSumFunc)ippiSum_16u_C3R :
457 type == CV_16UC4 ? (ippiSumFunc)ippiSum_16u_C4R :
458 type == CV_16SC1 ? (ippiSumFunc)ippiSum_16s_C1R :
459 type == CV_16SC3 ? (ippiSumFunc)ippiSum_16s_C3R :
460 type == CV_16SC4 ? (ippiSumFunc)ippiSum_16s_C4R :
461 type == CV_32FC1 ? (ippiSumFunc)ippiSum_32f_C1R :
462 type == CV_32FC3 ? (ippiSumFunc)ippiSum_32f_C3R :
463 type == CV_32FC4 ? (ippiSumFunc)ippiSum_32f_C4R :
468 if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
471 for( int i = 0; i < cn; i++ )
481 SumFunc func = sumTab[depth];
483 CV_Assert( cn <= 4 && func != 0 );
485 const Mat* arrays[] = {&src, 0};
487 NAryMatIterator it(arrays, ptrs);
489 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
491 AutoBuffer<int> _buf;
492 int* buf = (int*)&s[0];
494 bool blockSum = depth < CV_32S;
498 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
499 blockSize = std::min(blockSize, intSumBlockSize);
503 for( k = 0; k < cn; k++ )
505 esz = src.elemSize();
508 for( size_t i = 0; i < it.nplanes; i++, ++it )
510 for( j = 0; j < total; j += blockSize )
512 int bsz = std::min(total - j, blockSize);
513 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
515 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
517 for( k = 0; k < cn; k++ )
530 int cv::countNonZero( InputArray _src )
532 Mat src = _src.getMat();
533 CountNonZeroFunc func = countNonZeroTab[src.depth()];
535 CV_Assert( src.channels() == 1 && func != 0 );
537 const Mat* arrays[] = {&src, 0};
539 NAryMatIterator it(arrays, ptrs);
540 int total = (int)it.size, nz = 0;
542 for( size_t i = 0; i < it.nplanes; i++, ++it )
543 nz += func( ptrs[0], total );
548 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
550 Mat src = _src.getMat(), mask = _mask.getMat();
551 CV_Assert( mask.empty() || mask.type() == CV_8U );
553 int k, cn = src.channels(), depth = src.depth();
555 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
556 size_t total_size = src.total();
557 int rows = src.size[0], cols = (int)(total_size/rows);
558 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
560 IppiSize sz = { cols, rows };
561 int type = src.type();
564 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
565 ippiMaskMeanFuncC1 ippFuncC1 =
566 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
567 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
568 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
573 if( ippFuncC1(src.data, src.step[0], mask.data, mask.step[0], sz, &res) >= 0 )
578 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
579 ippiMaskMeanFuncC3 ippFuncC3 =
580 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
581 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
582 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
586 Ipp64f res1, res2, res3;
587 if( ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 1, &res1) >= 0 &&
588 ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 2, &res2) >= 0 &&
589 ippFuncC3(src.data, src.step[0], mask.data, mask.step[0], sz, 3, &res3) >= 0 )
591 return Scalar(res1, res2, res3);
597 typedef IppStatus (CV_STDCALL* ippiMeanFunc)(const void*, int, IppiSize, double *, int);
598 ippiMeanFunc ippFunc =
599 type == CV_8UC1 ? (ippiMeanFunc)ippiMean_8u_C1R :
600 type == CV_8UC3 ? (ippiMeanFunc)ippiMean_8u_C3R :
601 type == CV_8UC4 ? (ippiMeanFunc)ippiMean_8u_C4R :
602 type == CV_16UC1 ? (ippiMeanFunc)ippiMean_16u_C1R :
603 type == CV_16UC3 ? (ippiMeanFunc)ippiMean_16u_C3R :
604 type == CV_16UC4 ? (ippiMeanFunc)ippiMean_16u_C4R :
605 type == CV_16SC1 ? (ippiMeanFunc)ippiMean_16s_C1R :
606 type == CV_16SC3 ? (ippiMeanFunc)ippiMean_16s_C3R :
607 type == CV_16SC4 ? (ippiMeanFunc)ippiMean_16s_C4R :
608 type == CV_32FC1 ? (ippiMeanFunc)ippiMean_32f_C1R :
609 type == CV_32FC3 ? (ippiMeanFunc)ippiMean_32f_C3R :
610 type == CV_32FC4 ? (ippiMeanFunc)ippiMean_32f_C4R :
615 if( ippFunc(src.data, src.step[0], sz, res, ippAlgHintAccurate) >= 0 )
618 for( int i = 0; i < cn; i++ )
629 SumFunc func = sumTab[depth];
631 CV_Assert( cn <= 4 && func != 0 );
633 const Mat* arrays[] = {&src, &mask, 0};
635 NAryMatIterator it(arrays, ptrs);
637 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
639 AutoBuffer<int> _buf;
640 int* buf = (int*)&s[0];
641 bool blockSum = depth <= CV_16S;
642 size_t esz = 0, nz0 = 0;
646 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
647 blockSize = std::min(blockSize, intSumBlockSize);
651 for( k = 0; k < cn; k++ )
653 esz = src.elemSize();
656 for( size_t i = 0; i < it.nplanes; i++, ++it )
658 for( j = 0; j < total; j += blockSize )
660 int bsz = std::min(total - j, blockSize);
661 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
664 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
666 for( k = 0; k < cn; k++ )
678 return s*(nz0 ? 1./nz0 : 0);
682 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
684 Mat src = _src.getMat(), mask = _mask.getMat();
685 CV_Assert( mask.empty() || mask.type() == CV_8U );
687 int k, cn = src.channels(), depth = src.depth();
688 SumSqrFunc func = sumSqrTab[depth];
690 CV_Assert( func != 0 );
692 const Mat* arrays[] = {&src, &mask, 0};
694 NAryMatIterator it(arrays, ptrs);
695 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
696 int j, count = 0, nz0 = 0;
697 AutoBuffer<double> _buf(cn*4);
698 double *s = (double*)_buf, *sq = s + cn;
699 int *sbuf = (int*)s, *sqbuf = (int*)sq;
700 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
703 for( k = 0; k < cn; k++ )
708 intSumBlockSize = 1 << 15;
709 blockSize = std::min(blockSize, intSumBlockSize);
710 sbuf = (int*)(sq + cn);
713 for( k = 0; k < cn; k++ )
714 sbuf[k] = sqbuf[k] = 0;
715 esz = src.elemSize();
718 for( size_t i = 0; i < it.nplanes; i++, ++it )
720 for( j = 0; j < total; j += blockSize )
722 int bsz = std::min(total - j, blockSize);
723 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
726 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
728 for( k = 0; k < cn; k++ )
735 for( k = 0; k < cn; k++ )
749 double scale = nz0 ? 1./nz0 : 0.;
750 for( k = 0; k < cn; k++ )
753 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
756 for( j = 0; j < 2; j++ )
758 const double* sptr = j == 0 ? s : sq;
759 _OutputArray _dst = j == 0 ? _mean : _sdv;
763 if( !_dst.fixedSize() )
764 _dst.create(cn, 1, CV_64F, -1, true);
765 Mat dst = _dst.getMat();
766 int dcn = (int)dst.total();
767 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
768 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
769 double* dptr = dst.ptr<double>();
770 for( k = 0; k < cn; k++ )
772 for( ; k < dcn; k++ )
777 /****************************************************************************************\
779 \****************************************************************************************/
784 template<typename T, typename WT> static void
785 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
786 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
788 WT minVal = *_minVal, maxVal = *_maxVal;
789 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
793 for( int i = 0; i < len; i++ )
799 minIdx = startIdx + i;
804 maxIdx = startIdx + i;
810 for( int i = 0; i < len; i++ )
813 if( mask[i] && val < minVal )
816 minIdx = startIdx + i;
818 if( mask[i] && val > maxVal )
821 maxIdx = startIdx + i;
832 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
833 size_t* minidx, size_t* maxidx, int len, size_t startidx )
834 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
836 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
837 size_t* minidx, size_t* maxidx, int len, size_t startidx )
838 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
840 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
841 size_t* minidx, size_t* maxidx, int len, size_t startidx )
842 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
844 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
845 size_t* minidx, size_t* maxidx, int len, size_t startidx )
846 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
848 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
849 size_t* minidx, size_t* maxidx, int len, size_t startidx )
850 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
852 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
853 size_t* minidx, size_t* maxidx, int len, size_t startidx )
854 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
856 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
857 size_t* minidx, size_t* maxidx, int len, size_t startidx )
858 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
860 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
862 static MinMaxIdxFunc minmaxTab[] =
864 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
865 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
866 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
867 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
871 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
877 for( i = d-1; i >= 0; i-- )
880 idx[i] = (int)(ofs % sz);
886 for( i = d-1; i >= 0; i-- )
893 void cv::minMaxIdx(InputArray _src, double* minVal,
894 double* maxVal, int* minIdx, int* maxIdx,
897 Mat src = _src.getMat(), mask = _mask.getMat();
898 int depth = src.depth(), cn = src.channels();
900 CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) ||
901 (cn >= 1 && mask.empty() && !minIdx && !maxIdx) );
902 MinMaxIdxFunc func = minmaxTab[depth];
903 CV_Assert( func != 0 );
905 const Mat* arrays[] = {&src, &mask, 0};
907 NAryMatIterator it(arrays, ptrs);
909 size_t minidx = 0, maxidx = 0;
910 int iminval = INT_MAX, imaxval = INT_MIN;
911 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
912 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
914 int *minval = &iminval, *maxval = &imaxval;
915 int planeSize = (int)it.size*cn;
917 if( depth == CV_32F )
918 minval = (int*)&fminval, maxval = (int*)&fmaxval;
919 else if( depth == CV_64F )
920 minval = (int*)&dminval, maxval = (int*)&dmaxval;
922 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
923 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
926 dminval = dmaxval = 0;
927 else if( depth == CV_32F )
928 dminval = fminval, dmaxval = fmaxval;
929 else if( depth <= CV_32S )
930 dminval = iminval, dmaxval = imaxval;
938 ofs2idx(src, minidx, minIdx);
940 ofs2idx(src, maxidx, maxIdx);
943 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
944 Point* minLoc, Point* maxLoc, InputArray mask )
946 Mat img = _img.getMat();
947 CV_Assert(img.dims <= 2);
949 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
951 std::swap(minLoc->x, minLoc->y);
953 std::swap(maxLoc->x, maxLoc->y);
956 /****************************************************************************************\
958 \****************************************************************************************/
963 float normL2Sqr_(const float* a, const float* b, int n)
965 int j = 0; float d = 0.f;
969 float CV_DECL_ALIGNED(16) buf[4];
970 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
972 for( ; j <= n - 8; j += 8 )
974 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
975 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
976 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
977 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
979 _mm_store_ps(buf, _mm_add_ps(d0, d1));
980 d = buf[0] + buf[1] + buf[2] + buf[3];
985 for( ; j <= n - 4; j += 4 )
987 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
988 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
994 float t = a[j] - b[j];
1001 float normL1_(const float* a, const float* b, int n)
1003 int j = 0; float d = 0.f;
1007 float CV_DECL_ALIGNED(16) buf[4];
1008 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
1009 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1010 __m128 absmask = _mm_load_ps((const float*)absbuf);
1012 for( ; j <= n - 8; j += 8 )
1014 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1015 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1016 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
1017 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
1019 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1020 d = buf[0] + buf[1] + buf[2] + buf[3];
1025 for( ; j <= n - 4; j += 4 )
1027 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1028 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1033 d += std::abs(a[j] - b[j]);
1037 int normL1_(const uchar* a, const uchar* b, int n)
1043 __m128i d0 = _mm_setzero_si128();
1045 for( ; j <= n - 16; j += 16 )
1047 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
1048 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
1050 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1053 for( ; j <= n - 4; j += 4 )
1055 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
1056 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
1058 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1060 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
1065 for( ; j <= n - 4; j += 4 )
1067 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1068 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1072 d += std::abs(a[j] - b[j]);
1076 static const uchar popCountTable[] =
1078 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1079 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1080 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1081 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1082 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1083 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1084 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1085 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
1088 static const uchar popCountTable2[] =
1090 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1091 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1092 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1093 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1094 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1095 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1096 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1097 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
1100 static const uchar popCountTable4[] =
1102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1112 static int normHamming(const uchar* a, int n)
1114 int i = 0, result = 0;
1116 uint32x4_t bits = vmovq_n_u32(0);
1117 for (; i <= n - 16; i += 16) {
1118 uint8x16_t A_vec = vld1q_u8 (a + i);
1119 uint8x16_t bitsSet = vcntq_u8 (A_vec);
1120 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1121 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1122 bits = vaddq_u32(bits, bitSet4);
1124 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1125 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1126 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1128 for( ; i <= n - 4; i += 4 )
1129 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
1130 popCountTable[a[i+2]] + popCountTable[a[i+3]];
1133 result += popCountTable[a[i]];
1137 int normHamming(const uchar* a, const uchar* b, int n)
1139 int i = 0, result = 0;
1141 uint32x4_t bits = vmovq_n_u32(0);
1142 for (; i <= n - 16; i += 16) {
1143 uint8x16_t A_vec = vld1q_u8 (a + i);
1144 uint8x16_t B_vec = vld1q_u8 (b + i);
1145 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
1146 uint8x16_t bitsSet = vcntq_u8 (AxorB);
1147 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1148 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1149 bits = vaddq_u32(bits, bitSet4);
1151 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1152 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1153 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1155 for( ; i <= n - 4; i += 4 )
1156 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
1157 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
1160 result += popCountTable[a[i] ^ b[i]];
1164 static int normHamming(const uchar* a, int n, int cellSize)
1167 return normHamming(a, n);
1168 const uchar* tab = 0;
1170 tab = popCountTable2;
1171 else if( cellSize == 4 )
1172 tab = popCountTable4;
1174 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1175 int i = 0, result = 0;
1176 #if CV_ENABLE_UNROLLED
1177 for( ; i <= n - 4; i += 4 )
1178 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
1181 result += tab[a[i]];
1185 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1188 return normHamming(a, b, n);
1189 const uchar* tab = 0;
1191 tab = popCountTable2;
1192 else if( cellSize == 4 )
1193 tab = popCountTable4;
1195 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1196 int i = 0, result = 0;
1197 #if CV_ENABLE_UNROLLED
1198 for( ; i <= n - 4; i += 4 )
1199 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1200 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1203 result += tab[a[i] ^ b[i]];
1208 template<typename T, typename ST> int
1209 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1211 ST result = *_result;
1214 result = std::max(result, normInf<T, ST>(src, len*cn));
1218 for( int i = 0; i < len; i++, src += cn )
1221 for( int k = 0; k < cn; k++ )
1222 result = std::max(result, ST(fast_abs(src[k])));
1229 template<typename T, typename ST> int
1230 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1232 ST result = *_result;
1235 result += normL1<T, ST>(src, len*cn);
1239 for( int i = 0; i < len; i++, src += cn )
1242 for( int k = 0; k < cn; k++ )
1243 result += fast_abs(src[k]);
1250 template<typename T, typename ST> int
1251 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1253 ST result = *_result;
1256 result += normL2Sqr<T, ST>(src, len*cn);
1260 for( int i = 0; i < len; i++, src += cn )
1263 for( int k = 0; k < cn; k++ )
1274 template<typename T, typename ST> int
1275 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1277 ST result = *_result;
1280 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
1284 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1287 for( int k = 0; k < cn; k++ )
1288 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
1295 template<typename T, typename ST> int
1296 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1298 ST result = *_result;
1301 result += normL1<T, ST>(src1, src2, len*cn);
1305 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1308 for( int k = 0; k < cn; k++ )
1309 result += std::abs(src1[k] - src2[k]);
1316 template<typename T, typename ST> int
1317 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1319 ST result = *_result;
1322 result += normL2Sqr<T, ST>(src1, src2, len*cn);
1326 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1329 for( int k = 0; k < cn; k++ )
1331 ST v = src1[k] - src2[k];
1341 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
1342 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
1343 { return norm##L##_(src, mask, r, len, cn); } \
1344 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
1345 const uchar* mask, ntype* r, int len, int cn) \
1346 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
1348 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
1349 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
1350 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
1351 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
1353 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
1354 CV_DEF_NORM_ALL(8s, schar, int, int, int)
1355 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
1356 CV_DEF_NORM_ALL(16s, short, int, int, double)
1357 CV_DEF_NORM_ALL(32s, int, int, double, double)
1358 CV_DEF_NORM_ALL(32f, float, float, double, double)
1359 CV_DEF_NORM_ALL(64f, double, double, double, double)
1362 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
1363 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
1365 static NormFunc normTab[3][8] =
1368 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
1369 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
1372 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
1373 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
1376 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
1377 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
1381 static NormDiffFunc normDiffTab[3][8] =
1384 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
1385 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
1386 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
1387 (NormDiffFunc)normDiffInf_64f, 0
1390 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
1391 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
1392 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
1393 (NormDiffFunc)normDiffL1_64f, 0
1396 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
1397 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
1398 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
1399 (NormDiffFunc)normDiffL2_64f, 0
1405 double cv::norm( InputArray _src, int normType, InputArray _mask )
1407 Mat src = _src.getMat(), mask = _mask.getMat();
1408 int depth = src.depth(), cn = src.channels();
1411 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
1412 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
1414 if( src.isContinuous() && mask.empty() )
1416 size_t len = src.total()*cn;
1417 if( len == (size_t)(int)len )
1419 if( depth == CV_32F )
1421 const float* data = src.ptr<float>();
1423 if( normType == NORM_L2 )
1426 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1427 return std::sqrt(result);
1429 if( normType == NORM_L2SQR )
1432 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
1435 if( normType == NORM_L1 )
1438 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
1441 if( normType == NORM_INF )
1444 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
1448 if( depth == CV_8U )
1450 const uchar* data = src.ptr<uchar>();
1452 if( normType == NORM_HAMMING )
1453 return normHamming(data, (int)len);
1455 if( normType == NORM_HAMMING2 )
1456 return normHamming(data, (int)len, 2);
1461 CV_Assert( mask.empty() || mask.type() == CV_8U );
1463 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1468 bitwise_and(src, mask, temp);
1469 return norm(temp, normType);
1471 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1473 const Mat* arrays[] = {&src, 0};
1475 NAryMatIterator it(arrays, ptrs);
1476 int total = (int)it.size;
1479 for( size_t i = 0; i < it.nplanes; i++, ++it )
1480 result += normHamming(ptrs[0], total, cellSize);
1485 NormFunc func = normTab[normType >> 1][depth];
1486 CV_Assert( func != 0 );
1488 const Mat* arrays[] = {&src, &mask, 0};
1498 NAryMatIterator it(arrays, ptrs);
1499 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1500 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1501 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
1503 int *ibuf = &result.i;
1508 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
1509 blockSize = std::min(blockSize, intSumBlockSize);
1511 esz = src.elemSize();
1514 for( size_t i = 0; i < it.nplanes; i++, ++it )
1516 for( j = 0; j < total; j += blockSize )
1518 int bsz = std::min(total - j, blockSize);
1519 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
1521 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1533 if( normType == NORM_INF )
1535 if( depth == CV_64F )
1537 else if( depth == CV_32F )
1538 result.d = result.f;
1540 result.d = result.i;
1542 else if( normType == NORM_L2 )
1543 result.d = std::sqrt(result.d);
1549 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1551 if( normType & CV_RELATIVE )
1552 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1554 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1555 int depth = src1.depth(), cn = src1.channels();
1557 CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
1560 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
1561 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1563 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1565 size_t len = src1.total()*src1.channels();
1566 if( len == (size_t)(int)len )
1568 if( src1.depth() == CV_32F )
1570 const float* data1 = src1.ptr<float>();
1571 const float* data2 = src2.ptr<float>();
1573 if( normType == NORM_L2 )
1576 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1577 return std::sqrt(result);
1579 if( normType == NORM_L2SQR )
1582 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1585 if( normType == NORM_L1 )
1588 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1591 if( normType == NORM_INF )
1594 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1601 CV_Assert( mask.empty() || mask.type() == CV_8U );
1603 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1608 bitwise_xor(src1, src2, temp);
1609 bitwise_and(temp, mask, temp);
1610 return norm(temp, normType);
1612 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1614 const Mat* arrays[] = {&src1, &src2, 0};
1616 NAryMatIterator it(arrays, ptrs);
1617 int total = (int)it.size;
1620 for( size_t i = 0; i < it.nplanes; i++, ++it )
1621 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
1626 NormDiffFunc func = normDiffTab[normType >> 1][depth];
1627 CV_Assert( func != 0 );
1629 const Mat* arrays[] = {&src1, &src2, &mask, 0};
1640 NAryMatIterator it(arrays, ptrs);
1641 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
1642 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
1643 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
1645 unsigned *ibuf = &result.u;
1650 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1651 blockSize = std::min(blockSize, intSumBlockSize);
1653 esz = src1.elemSize();
1656 for( size_t i = 0; i < it.nplanes; i++, ++it )
1658 for( j = 0; j < total; j += blockSize )
1660 int bsz = std::min(total - j, blockSize);
1661 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
1663 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1676 if( normType == NORM_INF )
1678 if( depth == CV_64F )
1680 else if( depth == CV_32F )
1681 result.d = result.f;
1683 result.d = result.u;
1685 else if( normType == NORM_L2 )
1686 result.d = std::sqrt(result.d);
1692 ///////////////////////////////////// batch distance ///////////////////////////////////////
1697 template<typename _Tp, typename _Rt>
1698 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
1699 int nvecs, int len, _Rt* dist, const uchar* mask)
1701 step2 /= sizeof(src2[0]);
1704 for( int i = 0; i < nvecs; i++ )
1705 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
1709 _Rt val0 = std::numeric_limits<_Rt>::max();
1710 for( int i = 0; i < nvecs; i++ )
1711 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1715 template<typename _Tp, typename _Rt>
1716 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
1717 int nvecs, int len, _Rt* dist, const uchar* mask)
1719 step2 /= sizeof(src2[0]);
1722 for( int i = 0; i < nvecs; i++ )
1723 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
1727 _Rt val0 = std::numeric_limits<_Rt>::max();
1728 for( int i = 0; i < nvecs; i++ )
1729 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
1733 template<typename _Tp, typename _Rt>
1734 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
1735 int nvecs, int len, _Rt* dist, const uchar* mask)
1737 step2 /= sizeof(src2[0]);
1740 for( int i = 0; i < nvecs; i++ )
1741 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
1745 _Rt val0 = std::numeric_limits<_Rt>::max();
1746 for( int i = 0; i < nvecs; i++ )
1747 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
1751 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
1752 int nvecs, int len, int* dist, const uchar* mask)
1754 step2 /= sizeof(src2[0]);
1757 for( int i = 0; i < nvecs; i++ )
1758 dist[i] = normHamming(src1, src2 + step2*i, len);
1763 for( int i = 0; i < nvecs; i++ )
1764 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
1768 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
1769 int nvecs, int len, int* dist, const uchar* mask)
1771 step2 /= sizeof(src2[0]);
1774 for( int i = 0; i < nvecs; i++ )
1775 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
1780 for( int i = 0; i < nvecs; i++ )
1781 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
1785 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1786 int nvecs, int len, int* dist, const uchar* mask)
1788 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1791 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1792 int nvecs, int len, float* dist, const uchar* mask)
1794 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1797 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
1798 int nvecs, int len, int* dist, const uchar* mask)
1800 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
1803 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1804 int nvecs, int len, float* dist, const uchar* mask)
1806 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1809 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
1810 int nvecs, int len, float* dist, const uchar* mask)
1812 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
1815 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
1816 int nvecs, int len, float* dist, const uchar* mask)
1818 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1821 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
1822 int nvecs, int len, float* dist, const uchar* mask)
1824 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1827 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
1828 int nvecs, int len, float* dist, const uchar* mask)
1830 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
1833 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
1834 int nvecs, int len, uchar* dist, const uchar* mask);
1837 struct BatchDistInvoker : public ParallelLoopBody
1839 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
1840 Mat& _dist, Mat& _nidx, int _K,
1841 const Mat& _mask, int _update,
1842 BatchDistFunc _func)
1854 void operator()(const Range& range) const
1856 AutoBuffer<int> buf(src2->rows);
1859 for( int i = range.start; i < range.end; i++ )
1861 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
1862 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
1866 int* nidxptr = nidx->ptr<int>(i);
1867 // since positive float's can be compared just like int's,
1868 // we handle both CV_32S and CV_32F cases with a single branch
1869 int* distptr = (int*)dist->ptr(i);
1873 for( j = 0; j < src2->rows; j++ )
1876 if( d < distptr[K-1] )
1878 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
1880 nidxptr[k+1] = nidxptr[k];
1881 distptr[k+1] = distptr[k];
1883 nidxptr[k+1] = j + update;
1903 void cv::batchDistance( InputArray _src1, InputArray _src2,
1904 OutputArray _dist, int dtype, OutputArray _nidx,
1905 int normType, int K, InputArray _mask,
1906 int update, bool crosscheck )
1908 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1909 int type = src1.type();
1910 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
1911 (type == CV_32F || type == CV_8U));
1912 CV_Assert( _nidx.needed() == (K > 0) );
1916 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
1918 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
1920 K = std::min(K, src2.rows);
1922 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
1923 Mat dist = _dist.getMat(), nidx;
1924 if( _nidx.needed() )
1926 _nidx.create(dist.size(), CV_32S);
1927 nidx = _nidx.getMat();
1930 if( update == 0 && K > 0 )
1932 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
1933 nidx = Scalar::all(-1);
1938 CV_Assert( K == 1 && update == 0 && mask.empty() );
1940 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
1942 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
1943 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
1944 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
1945 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
1946 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
1947 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
1948 if( dtype == CV_32S )
1950 for( int i = 0; i < tdist.rows; i++ )
1952 int idx = tidx.at<int>(i);
1953 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
1956 dist.at<int>(idx) = d;
1957 nidx.at<int>(idx) = i + update;
1963 for( int i = 0; i < tdist.rows; i++ )
1965 int idx = tidx.at<int>(i);
1966 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
1969 dist.at<float>(idx) = d;
1970 nidx.at<int>(idx) = i + update;
1977 BatchDistFunc func = 0;
1980 if( normType == NORM_L1 && dtype == CV_32S )
1981 func = (BatchDistFunc)batchDistL1_8u32s;
1982 else if( normType == NORM_L1 && dtype == CV_32F )
1983 func = (BatchDistFunc)batchDistL1_8u32f;
1984 else if( normType == NORM_L2SQR && dtype == CV_32S )
1985 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
1986 else if( normType == NORM_L2SQR && dtype == CV_32F )
1987 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
1988 else if( normType == NORM_L2 && dtype == CV_32F )
1989 func = (BatchDistFunc)batchDistL2_8u32f;
1990 else if( normType == NORM_HAMMING && dtype == CV_32S )
1991 func = (BatchDistFunc)batchDistHamming;
1992 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
1993 func = (BatchDistFunc)batchDistHamming2;
1995 else if( type == CV_32F && dtype == CV_32F )
1997 if( normType == NORM_L1 )
1998 func = (BatchDistFunc)batchDistL1_32f;
1999 else if( normType == NORM_L2SQR )
2000 func = (BatchDistFunc)batchDistL2Sqr_32f;
2001 else if( normType == NORM_L2 )
2002 func = (BatchDistFunc)batchDistL2_32f;
2006 CV_Error_(CV_StsUnsupportedFormat,
2007 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
2008 type, dtype, normType));
2010 parallel_for_(Range(0, src1.rows),
2011 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
2015 void cv::findNonZero( InputArray _src, OutputArray _idx )
2017 Mat src = _src.getMat();
2018 CV_Assert( src.type() == CV_8UC1 );
2019 int n = countNonZero(src);
2020 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
2022 _idx.create(n, 1, CV_32SC2);
2023 Mat idx = _idx.getMat();
2024 CV_Assert(idx.isContinuous());
2025 Point* idx_ptr = (Point*)idx.data;
2027 for( int i = 0; i < src.rows; i++ )
2029 const uchar* bin_ptr = src.ptr(i);
2030 for( int j = 0; j < src.cols; j++ )
2032 *idx_ptr++ = Point(j, i);
2037 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
2039 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
2040 if( CV_IS_IMAGE(srcarr) )
2042 int coi = cvGetImageCOI((IplImage*)srcarr);
2045 CV_Assert( 0 < coi && coi <= 4 );
2046 sum = cv::Scalar(sum[coi-1]);
2052 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
2054 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
2055 if( img.channels() > 1 )
2056 cv::extractImageCOI(imgarr, img);
2057 return countNonZero(img);
2062 cvAvg( const void* imgarr, const void* maskarr )
2064 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
2065 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
2066 if( CV_IS_IMAGE(imgarr) )
2068 int coi = cvGetImageCOI((IplImage*)imgarr);
2071 CV_Assert( 0 < coi && coi <= 4 );
2072 mean = cv::Scalar(mean[coi-1]);
2080 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
2082 cv::Scalar mean, sdv;
2086 mask = cv::cvarrToMat(maskarr);
2088 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
2090 if( CV_IS_IMAGE(imgarr) )
2092 int coi = cvGetImageCOI((IplImage*)imgarr);
2095 CV_Assert( 0 < coi && coi <= 4 );
2096 mean = cv::Scalar(mean[coi-1]);
2097 sdv = cv::Scalar(sdv[coi-1]);
2102 *(cv::Scalar*)_mean = mean;
2104 *(cv::Scalar*)_sdv = sdv;
2109 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
2110 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
2112 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
2114 mask = cv::cvarrToMat(maskarr);
2115 if( img.channels() > 1 )
2116 cv::extractImageCOI(imgarr, img);
2118 cv::minMaxLoc( img, _minVal, _maxVal,
2119 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
2124 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
2133 a = cv::cvarrToMat(imgA, false, true, 1);
2135 mask = cv::cvarrToMat(maskarr);
2137 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
2138 cv::extractImageCOI(imgA, a);
2141 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
2143 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
2144 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
2145 cv::extractImageCOI(imgB, b);
2147 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);