1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
47 #include "opencl_kernels_core.hpp"
52 template<typename T> static inline Scalar rawToScalar(const T& v)
55 typedef typename DataType<T>::channel_type T1;
56 int i, n = DataType<T>::channels;
57 for( i = 0; i < n; i++ )
58 s.val[i] = ((T1*)&v)[i];
62 /****************************************************************************************\
64 \****************************************************************************************/
66 template <typename T, typename ST>
69 int operator () (const T *, const uchar *, ST *, int, int) const
78 struct Sum_SIMD<uchar, int>
80 int operator () (const uchar * src0, const uchar * mask, int * dst, int len, int cn) const
82 if (mask || (cn != 1 && cn != 2 && cn != 4))
86 uint32x4_t v_sum = vdupq_n_u32(0u);
88 for ( ; x <= len - 16; x += 16)
90 uint8x16_t v_src = vld1q_u8(src0 + x);
91 uint16x8_t v_half = vmovl_u8(vget_low_u8(v_src));
93 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_half)));
94 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_half)));
96 v_half = vmovl_u8(vget_high_u8(v_src));
97 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_half)));
98 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_half)));
101 for ( ; x <= len - 8; x += 8)
103 uint16x8_t v_src = vmovl_u8(vld1_u8(src0 + x));
105 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_src)));
106 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_src)));
109 unsigned int CV_DECL_ALIGNED(16) ar[4];
110 vst1q_u32(ar, v_sum);
112 for (int i = 0; i < 4; i += cn)
113 for (int j = 0; j < cn; ++j)
121 struct Sum_SIMD<schar, int>
123 int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const
125 if (mask || (cn != 1 && cn != 2 && cn != 4))
129 int32x4_t v_sum = vdupq_n_s32(0);
131 for ( ; x <= len - 16; x += 16)
133 int8x16_t v_src = vld1q_s8(src0 + x);
134 int16x8_t v_half = vmovl_s8(vget_low_s8(v_src));
136 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_half)));
137 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_half)));
139 v_half = vmovl_s8(vget_high_s8(v_src));
140 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_half)));
141 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_half)));
144 for ( ; x <= len - 8; x += 8)
146 int16x8_t v_src = vmovl_s8(vld1_s8(src0 + x));
148 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_src)));
149 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_src)));
152 int CV_DECL_ALIGNED(16) ar[4];
153 vst1q_s32(ar, v_sum);
155 for (int i = 0; i < 4; i += cn)
156 for (int j = 0; j < cn; ++j)
164 struct Sum_SIMD<ushort, int>
166 int operator () (const ushort * src0, const uchar * mask, int * dst, int len, int cn) const
168 if (mask || (cn != 1 && cn != 2 && cn != 4))
172 uint32x4_t v_sum = vdupq_n_u32(0u);
174 for ( ; x <= len - 8; x += 8)
176 uint16x8_t v_src = vld1q_u16(src0 + x);
178 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_src)));
179 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_src)));
182 for ( ; x <= len - 4; x += 4)
183 v_sum = vaddq_u32(v_sum, vmovl_u16(vld1_u16(src0 + x)));
185 unsigned int CV_DECL_ALIGNED(16) ar[4];
186 vst1q_u32(ar, v_sum);
188 for (int i = 0; i < 4; i += cn)
189 for (int j = 0; j < cn; ++j)
197 struct Sum_SIMD<short, int>
199 int operator () (const short * src0, const uchar * mask, int * dst, int len, int cn) const
201 if (mask || (cn != 1 && cn != 2 && cn != 4))
205 int32x4_t v_sum = vdupq_n_s32(0u);
207 for ( ; x <= len - 8; x += 8)
209 int16x8_t v_src = vld1q_s16(src0 + x);
211 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_src)));
212 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_src)));
215 for ( ; x <= len - 4; x += 4)
216 v_sum = vaddq_s32(v_sum, vmovl_s16(vld1_s16(src0 + x)));
218 int CV_DECL_ALIGNED(16) ar[4];
219 vst1q_s32(ar, v_sum);
221 for (int i = 0; i < 4; i += cn)
222 for (int j = 0; j < cn; ++j)
231 template<typename T, typename ST>
232 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
238 int i = vop(src0, mask, dst, len, cn), k = cn % 4;
245 #if CV_ENABLE_UNROLLED
246 for(; i <= len - 4; i += 4, src += cn*4 )
247 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
249 for( ; i < len; i++, src += cn )
255 ST s0 = dst[0], s1 = dst[1];
256 for( ; i < len; i++, src += cn )
266 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
267 for( ; i < len; i++, src += cn )
278 for( ; k < cn; k += 4 )
280 src = src0 + i*cn + k;
281 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
282 for( ; i < len; i++, src += cn )
284 s0 += src[0]; s1 += src[1];
285 s2 += src[2]; s3 += src[3];
299 for( i = 0; i < len; i++ )
309 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
310 for( i = 0; i < len; i++, src += 3 )
324 for( i = 0; i < len; i++, src += cn )
328 #if CV_ENABLE_UNROLLED
329 for( ; k <= cn - 4; k += 4 )
332 s0 = dst[k] + src[k];
333 s1 = dst[k+1] + src[k+1];
334 dst[k] = s0; dst[k+1] = s1;
335 s0 = dst[k+2] + src[k+2];
336 s1 = dst[k+3] + src[k+3];
337 dst[k+2] = s0; dst[k+3] = s1;
349 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
350 { return sum_(src, mask, dst, len, cn); }
352 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
353 { return sum_(src, mask, dst, len, cn); }
355 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
356 { return sum_(src, mask, dst, len, cn); }
358 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
359 { return sum_(src, mask, dst, len, cn); }
361 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
362 { return sum_(src, mask, dst, len, cn); }
364 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
365 { return sum_(src, mask, dst, len, cn); }
367 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
368 { return sum_(src, mask, dst, len, cn); }
370 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
372 static SumFunc getSumFunc(int depth)
374 static SumFunc sumTab[] =
376 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
377 (SumFunc)sum16u, (SumFunc)sum16s,
379 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
383 return sumTab[depth];
387 static int countNonZero_(const T* src, int len )
390 #if CV_ENABLE_UNROLLED
391 for(; i <= len - 4; i += 4 )
392 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
394 for( ; i < len; i++ )
399 static int countNonZero8u( const uchar* src, int len )
405 __m128i pattern = _mm_setzero_si128 ();
406 static uchar tab[256];
407 static volatile bool initialized = false;
410 // we compute inverse popcount table,
411 // since we pass (img[x] == 0) mask as index in the table.
412 for( int j = 0; j < 256; j++ )
415 for( int mask = 1; mask < 256; mask += mask )
416 val += (j & mask) == 0;
422 for (; i<=len-16; i+=16)
424 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
425 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
426 nz += tab[val & 255] + tab[val >> 8];
430 int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6;
431 uint32x4_t v_nz = vdupq_n_u32(0u);
432 uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1);
433 const uchar * src0 = src;
437 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
439 while (j < blockSizei)
441 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
442 uint8x16_t v_pz = v_zero;
444 for( ; k <= blockSizej - 16; k += 16 )
445 v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1));
447 uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz));
448 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz);
449 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz);
458 CV_DECL_ALIGNED(16) unsigned int buf[4];
459 vst1q_u32(buf, v_nz);
460 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
462 for( ; i < len; i++ )
467 static int countNonZero16u( const ushort* src, int len )
471 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
472 uint32x4_t v_nz = vdupq_n_u32(0u);
473 uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1);
477 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
479 while (j < blockSizei)
481 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
482 uint16x8_t v_pz = v_zero;
484 for( ; k <= blockSizej - 8; k += 8 )
485 v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1));
487 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
496 CV_DECL_ALIGNED(16) unsigned int buf[4];
497 vst1q_u32(buf, v_nz);
498 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
500 return nz + countNonZero_(src, len - i);
503 static int countNonZero32s( const int* src, int len )
507 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
508 uint32x4_t v_nz = vdupq_n_u32(0u);
509 int32x4_t v_zero = vdupq_n_s32(0.0f);
510 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
514 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
516 while (j < blockSizei)
518 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
519 uint16x8_t v_pz = v_zerou;
521 for( ; k <= blockSizej - 8; k += 8 )
522 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)),
523 vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1));
525 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
534 CV_DECL_ALIGNED(16) unsigned int buf[4];
535 vst1q_u32(buf, v_nz);
536 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
538 return nz + countNonZero_(src, len - i);
541 static int countNonZero32f( const float* src, int len )
545 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
546 uint32x4_t v_nz = vdupq_n_u32(0u);
547 float32x4_t v_zero = vdupq_n_f32(0.0f);
548 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
552 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
554 while (j < blockSizei)
556 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
557 uint16x8_t v_pz = v_zerou;
559 for( ; k <= blockSizej - 8; k += 8 )
560 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)),
561 vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1));
563 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
572 CV_DECL_ALIGNED(16) unsigned int buf[4];
573 vst1q_u32(buf, v_nz);
574 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
576 return nz + countNonZero_(src, len - i);
579 static int countNonZero64f( const double* src, int len )
580 { return countNonZero_(src, len); }
582 typedef int (*CountNonZeroFunc)(const uchar*, int);
584 static CountNonZeroFunc getCountNonZeroTab(int depth)
586 static CountNonZeroFunc countNonZeroTab[] =
588 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
589 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
590 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
591 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
594 return countNonZeroTab[depth];
597 template<typename T, typename ST, typename SQT>
598 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
611 for( i = 0; i < len; i++, src += cn )
614 s0 += v; sq0 += (SQT)v*v;
621 ST s0 = sum[0], s1 = sum[1];
622 SQT sq0 = sqsum[0], sq1 = sqsum[1];
623 for( i = 0; i < len; i++, src += cn )
625 T v0 = src[0], v1 = src[1];
626 s0 += v0; sq0 += (SQT)v0*v0;
627 s1 += v1; sq1 += (SQT)v1*v1;
629 sum[0] = s0; sum[1] = s1;
630 sqsum[0] = sq0; sqsum[1] = sq1;
634 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
635 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
636 for( i = 0; i < len; i++, src += cn )
638 T v0 = src[0], v1 = src[1], v2 = src[2];
639 s0 += v0; sq0 += (SQT)v0*v0;
640 s1 += v1; sq1 += (SQT)v1*v1;
641 s2 += v2; sq2 += (SQT)v2*v2;
643 sum[0] = s0; sum[1] = s1; sum[2] = s2;
644 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
647 for( ; k < cn; k += 4 )
650 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
651 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
652 for( i = 0; i < len; i++, src += cn )
655 v0 = src[0], v1 = src[1];
656 s0 += v0; sq0 += (SQT)v0*v0;
657 s1 += v1; sq1 += (SQT)v1*v1;
658 v0 = src[2], v1 = src[3];
659 s2 += v0; sq2 += (SQT)v0*v0;
660 s3 += v1; sq3 += (SQT)v1*v1;
662 sum[k] = s0; sum[k+1] = s1;
663 sum[k+2] = s2; sum[k+3] = s3;
664 sqsum[k] = sq0; sqsum[k+1] = sq1;
665 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
676 for( i = 0; i < len; i++ )
680 s0 += v; sq0 += (SQT)v*v;
688 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
689 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
690 for( i = 0; i < len; i++, src += 3 )
693 T v0 = src[0], v1 = src[1], v2 = src[2];
694 s0 += v0; sq0 += (SQT)v0*v0;
695 s1 += v1; sq1 += (SQT)v1*v1;
696 s2 += v2; sq2 += (SQT)v2*v2;
699 sum[0] = s0; sum[1] = s1; sum[2] = s2;
700 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
704 for( i = 0; i < len; i++, src += cn )
707 for( int k = 0; k < cn; k++ )
711 SQT sq = sqsum[k] + (SQT)v*v;
712 sum[k] = s; sqsum[k] = sq;
721 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
722 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
724 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
725 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
727 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
728 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
730 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
731 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
733 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
734 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
736 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
737 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
739 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
740 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
742 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
744 static SumSqrFunc getSumSqrTab(int depth)
746 static SumSqrFunc sumSqrTab[] =
748 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
749 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
752 return sumSqrTab[depth];
757 template <typename T> Scalar ocl_part_sum(Mat m)
759 CV_Assert(m.rows == 1);
761 Scalar s = Scalar::all(0);
762 int cn = m.channels();
763 const T * const ptr = m.ptr<T>(0);
765 for (int x = 0, w = m.cols * cn; x < w; )
766 for (int c = 0; c < cn; ++c, ++x)
772 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
774 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(),
775 InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() )
777 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
779 const ocl::Device & dev = ocl::Device::getDefault();
780 bool doubleSupport = dev.doubleFPConfig() > 0,
781 haveMask = _mask.kind() != _InputArray::NONE,
782 haveSrc2 = _src2.kind() != _InputArray::NONE;
783 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
784 kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1,
785 mcn = std::max(cn, kercn);
786 CV_Assert(!haveSrc2 || _src2.type() == type);
787 int convert_cn = haveSrc2 ? mcn : cn;
789 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
792 int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
793 size_t wgs = dev.maxWorkGroupSize();
795 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
796 dtype = CV_MAKE_TYPE(ddepth, cn);
797 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
799 int wgs2_aligned = 1;
800 while (wgs2_aligned < (int)wgs)
804 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
806 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
807 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s",
808 ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth),
809 ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)),
810 ocl::typeToStr(ddepth), ddepth, cn,
811 ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]),
812 opMap[sum_op], (int)wgs, wgs2_aligned,
813 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
814 haveMask ? " -D HAVE_MASK" : "",
815 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
816 haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
817 haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "",
818 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "",
819 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert");
821 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
825 UMat src = _src.getUMat(), src2 = _src2.getUMat(),
826 db(1, dbsize, dtype), mask = _mask.getUMat();
828 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
829 dbarg = ocl::KernelArg::PtrWriteOnly(db),
830 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
831 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2);
836 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg);
838 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg);
843 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg);
845 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg);
848 size_t globalsize = ngroups * wgs;
849 if (k.run(1, &globalsize, &wgs, false))
851 typedef Scalar (*part_sum)(Mat m);
852 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
853 func = funcs[ddepth - CV_32S];
855 Mat mres = db.getMat(ACCESS_READ);
857 const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
859 res = func(mres.colRange(0, ngroups));
869 cv::Scalar cv::sum( InputArray _src )
873 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
874 ocl_sum(_src, _res, OCL_OP_SUM),
878 Mat src = _src.getMat();
879 int k, cn = src.channels(), depth = src.depth();
881 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
882 size_t total_size = src.total();
883 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
884 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
886 IppiSize sz = { cols, rows };
887 int type = src.type();
888 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
889 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
890 ippiSumFuncHint ippFuncHint =
891 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
892 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
893 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
895 ippiSumFuncNoHint ippFuncNoHint =
896 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
897 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
898 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
899 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
900 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
901 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
902 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
903 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
904 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
906 CV_Assert(!ippFuncHint || !ippFuncNoHint);
907 if( ippFuncHint || ippFuncNoHint )
910 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
911 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
915 for( int i = 0; i < cn; i++ )
924 SumFunc func = getSumFunc(depth);
926 CV_Assert( cn <= 4 && func != 0 );
928 const Mat* arrays[] = {&src, 0};
930 NAryMatIterator it(arrays, ptrs);
932 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
934 AutoBuffer<int> _buf;
935 int* buf = (int*)&s[0];
937 bool blockSum = depth < CV_32S;
941 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
942 blockSize = std::min(blockSize, intSumBlockSize);
946 for( k = 0; k < cn; k++ )
948 esz = src.elemSize();
951 for( size_t i = 0; i < it.nplanes; i++, ++it )
953 for( j = 0; j < total; j += blockSize )
955 int bsz = std::min(total - j, blockSize);
956 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
958 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
960 for( k = 0; k < cn; k++ )
977 static bool ocl_countNonZero( InputArray _src, int & res )
979 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
980 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
982 if (depth == CV_64F && !doubleSupport)
985 int dbsize = ocl::Device::getDefault().maxComputeUnits();
986 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
988 int wgs2_aligned = 1;
989 while (wgs2_aligned < (int)wgs)
993 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
994 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
995 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
996 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
997 ocl::typeToStr(depth), (int)wgs, kercn,
998 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
999 _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
1003 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
1004 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1005 dbsize, ocl::KernelArg::PtrWriteOnly(db));
1007 size_t globalsize = dbsize * wgs;
1008 if (k.run(1, &globalsize, &wgs, true))
1009 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
1017 int cv::countNonZero( InputArray _src )
1019 int type = _src.type(), cn = CV_MAT_CN(type);
1020 CV_Assert( cn == 1 );
1024 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
1025 ocl_countNonZero(_src, res),
1029 Mat src = _src.getMat();
1031 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
1032 if (src.dims <= 2 || src.isContinuous())
1034 IppiSize roiSize = { src.cols, src.rows };
1035 Ipp32s count = 0, srcstep = (Ipp32s)src.step;
1036 IppStatus status = (IppStatus)-1;
1038 if (src.isContinuous())
1040 roiSize.width = (Ipp32s)src.total();
1042 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
1045 int depth = CV_MAT_DEPTH(type);
1047 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
1048 else if (depth == CV_32F)
1049 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
1052 return (Ipp32s)src.total() - count;
1053 setIppErrorStatus();
1057 CountNonZeroFunc func = getCountNonZeroTab(src.depth());
1058 CV_Assert( func != 0 );
1060 const Mat* arrays[] = {&src, 0};
1062 NAryMatIterator it(arrays, ptrs);
1063 int total = (int)it.size, nz = 0;
1065 for( size_t i = 0; i < it.nplanes; i++, ++it )
1066 nz += func( ptrs[0], total );
1071 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
1073 Mat src = _src.getMat(), mask = _mask.getMat();
1074 CV_Assert( mask.empty() || mask.type() == CV_8U );
1076 int k, cn = src.channels(), depth = src.depth();
1078 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1079 size_t total_size = src.total();
1080 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1081 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1083 IppiSize sz = { cols, rows };
1084 int type = src.type();
1087 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
1088 ippiMaskMeanFuncC1 ippFuncC1 =
1089 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
1090 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
1091 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
1096 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
1098 setIppErrorStatus();
1100 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
1101 ippiMaskMeanFuncC3 ippFuncC3 =
1102 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
1103 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
1104 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
1108 Ipp64f res1, res2, res3;
1109 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
1110 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
1111 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
1113 return Scalar(res1, res2, res3);
1115 setIppErrorStatus();
1120 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
1121 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
1122 ippiMeanFuncHint ippFuncHint =
1123 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
1124 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
1125 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
1127 ippiMeanFuncNoHint ippFuncNoHint =
1128 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
1129 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
1130 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
1131 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
1132 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
1133 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
1134 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
1135 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
1136 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
1138 // Make sure only zero or one version of the function pointer is valid
1139 CV_Assert(!ippFuncHint || !ippFuncNoHint);
1140 if( ippFuncHint || ippFuncNoHint )
1143 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
1144 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
1148 for( int i = 0; i < cn; i++ )
1152 setIppErrorStatus();
1158 SumFunc func = getSumFunc(depth);
1160 CV_Assert( cn <= 4 && func != 0 );
1162 const Mat* arrays[] = {&src, &mask, 0};
1164 NAryMatIterator it(arrays, ptrs);
1166 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1168 AutoBuffer<int> _buf;
1169 int* buf = (int*)&s[0];
1170 bool blockSum = depth <= CV_16S;
1171 size_t esz = 0, nz0 = 0;
1175 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
1176 blockSize = std::min(blockSize, intSumBlockSize);
1180 for( k = 0; k < cn; k++ )
1182 esz = src.elemSize();
1185 for( size_t i = 0; i < it.nplanes; i++, ++it )
1187 for( j = 0; j < total; j += blockSize )
1189 int bsz = std::min(total - j, blockSize);
1190 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
1193 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1195 for( k = 0; k < cn; k++ )
1207 return s*(nz0 ? 1./nz0 : 0);
1214 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1216 bool haveMask = _mask.kind() != _InputArray::NONE;
1217 int nz = haveMask ? -1 : (int)_src.total();
1218 Scalar mean, stddev;
1221 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1222 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
1223 isContinuous = _src.isContinuous(),
1224 isMaskContinuous = _mask.isContinuous();
1225 const ocl::Device &defDev = ocl::Device::getDefault();
1226 int groups = defDev.maxComputeUnits();
1227 if (defDev.isIntel())
1229 static const int subSliceEUCount = 10;
1230 groups = (groups / subSliceEUCount) * 2;
1232 size_t wgs = defDev.maxWorkGroupSize();
1234 int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth),
1235 dtype = CV_MAKE_TYPE(ddepth, cn),
1236 sqdtype = CV_MAKETYPE(sqddepth, cn);
1237 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
1239 int wgs2_aligned = 1;
1240 while (wgs2_aligned < (int)wgs)
1244 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
1248 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
1249 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
1250 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
1251 ocl::typeToStr(type), ocl::typeToStr(depth),
1252 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
1253 ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
1254 ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
1255 cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
1256 isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
1257 ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
1258 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
1259 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
1261 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
1265 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
1266 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
1267 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1269 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1270 dbarg = ocl::KernelArg::PtrWriteOnly(db),
1271 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
1274 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg);
1276 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
1278 size_t globalsize = groups * wgs;
1279 if (!k.run(1, &globalsize, &wgs, false))
1282 typedef Scalar (* part_sum)(Mat m);
1283 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
1284 Mat dbm = db.getMat(ACCESS_READ);
1286 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr()));
1287 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype)));
1290 nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() +
1291 groups * (CV_ELEM_SIZE(dtype) +
1292 CV_ELEM_SIZE(sqdtype))))[0]);
1295 double total = nz != 0 ? 1.0 / nz : 0;
1296 int k, j, cn = _src.channels();
1297 for (int i = 0; i < cn; ++i)
1300 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
1303 for( j = 0; j < 2; j++ )
1305 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
1306 _OutputArray _dst = j == 0 ? _mean : _sdv;
1307 if( !_dst.needed() )
1310 if( !_dst.fixedSize() )
1311 _dst.create(cn, 1, CV_64F, -1, true);
1312 Mat dst = _dst.getMat();
1313 int dcn = (int)dst.total();
1314 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1315 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1316 double* dptr = dst.ptr<double>();
1317 for( k = 0; k < cn; k++ )
1319 for( ; k < dcn; k++ )
1330 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1332 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
1333 ocl_meanStdDev(_src, _mean, _sdv, _mask))
1335 Mat src = _src.getMat(), mask = _mask.getMat();
1336 CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
1338 int k, cn = src.channels(), depth = src.depth();
1340 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1341 size_t total_size = src.total();
1342 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1343 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1345 Ipp64f mean_temp[3];
1346 Ipp64f stddev_temp[3];
1347 Ipp64f *pmean = &mean_temp[0];
1348 Ipp64f *pstddev = &stddev_temp[0];
1351 if( _mean.needed() )
1353 if( !_mean.fixedSize() )
1354 _mean.create(cn, 1, CV_64F, -1, true);
1355 mean = _mean.getMat();
1356 dcn_mean = (int)mean.total();
1357 pmean = mean.ptr<Ipp64f>();
1359 int dcn_stddev = -1;
1362 if( !_sdv.fixedSize() )
1363 _sdv.create(cn, 1, CV_64F, -1, true);
1364 stddev = _sdv.getMat();
1365 dcn_stddev = (int)stddev.total();
1366 pstddev = stddev.ptr<Ipp64f>();
1368 for( int c = cn; c < dcn_mean; c++ )
1370 for( int c = cn; c < dcn_stddev; c++ )
1372 IppiSize sz = { cols, rows };
1373 int type = src.type();
1376 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1377 ippiMaskMeanStdDevFuncC1 ippFuncC1 =
1378 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
1379 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
1380 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
1384 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
1386 setIppErrorStatus();
1388 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1389 ippiMaskMeanStdDevFuncC3 ippFuncC3 =
1390 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
1391 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
1392 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
1396 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1397 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1398 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1400 setIppErrorStatus();
1405 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1406 ippiMeanStdDevFuncC1 ippFuncC1 =
1407 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
1408 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
1409 #if (IPP_VERSION_X100 >= 801)
1410 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
1415 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
1417 setIppErrorStatus();
1419 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1420 ippiMeanStdDevFuncC3 ippFuncC3 =
1421 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
1422 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
1423 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
1427 if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1428 ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1429 ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1431 setIppErrorStatus();
1438 SumSqrFunc func = getSumSqrTab(depth);
1440 CV_Assert( func != 0 );
1442 const Mat* arrays[] = {&src, &mask, 0};
1444 NAryMatIterator it(arrays, ptrs);
1445 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1446 int j, count = 0, nz0 = 0;
1447 AutoBuffer<double> _buf(cn*4);
1448 double *s = (double*)_buf, *sq = s + cn;
1449 int *sbuf = (int*)s, *sqbuf = (int*)sq;
1450 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
1453 for( k = 0; k < cn; k++ )
1458 intSumBlockSize = 1 << 15;
1459 blockSize = std::min(blockSize, intSumBlockSize);
1460 sbuf = (int*)(sq + cn);
1463 for( k = 0; k < cn; k++ )
1464 sbuf[k] = sqbuf[k] = 0;
1465 esz = src.elemSize();
1468 for( size_t i = 0; i < it.nplanes; i++, ++it )
1470 for( j = 0; j < total; j += blockSize )
1472 int bsz = std::min(total - j, blockSize);
1473 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
1476 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1478 for( k = 0; k < cn; k++ )
1485 for( k = 0; k < cn; k++ )
1499 double scale = nz0 ? 1./nz0 : 0.;
1500 for( k = 0; k < cn; k++ )
1503 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
1506 for( j = 0; j < 2; j++ )
1508 const double* sptr = j == 0 ? s : sq;
1509 _OutputArray _dst = j == 0 ? _mean : _sdv;
1510 if( !_dst.needed() )
1513 if( !_dst.fixedSize() )
1514 _dst.create(cn, 1, CV_64F, -1, true);
1515 Mat dst = _dst.getMat();
1516 int dcn = (int)dst.total();
1517 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1518 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1519 double* dptr = dst.ptr<double>();
1520 for( k = 0; k < cn; k++ )
1522 for( ; k < dcn; k++ )
1527 /****************************************************************************************\
1529 \****************************************************************************************/
1534 template<typename T, typename WT> static void
1535 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
1536 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
1538 WT minVal = *_minVal, maxVal = *_maxVal;
1539 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
1543 for( int i = 0; i < len; i++ )
1549 minIdx = startIdx + i;
1554 maxIdx = startIdx + i;
1560 for( int i = 0; i < len; i++ )
1563 if( mask[i] && val < minVal )
1566 minIdx = startIdx + i;
1568 if( mask[i] && val > maxVal )
1571 maxIdx = startIdx + i;
1582 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
1583 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1584 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1586 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
1587 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1588 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1590 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
1591 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1592 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1594 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
1595 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1596 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1598 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
1599 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1600 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1602 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
1603 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1604 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1606 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
1607 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1608 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1610 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
1612 static MinMaxIdxFunc getMinmaxTab(int depth)
1614 static MinMaxIdxFunc minmaxTab[] =
1616 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
1617 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
1618 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
1619 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
1623 return minmaxTab[depth];
1626 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
1632 for( i = d-1; i >= 0; i-- )
1635 idx[i] = (int)(ofs % sz);
1641 for( i = d-1; i >= 0; i-- )
1648 template <typename T>
1649 void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
1650 int* minLoc, int* maxLoc,
1651 int groupnum, int cols, double * maxVal2)
1653 uint index_max = std::numeric_limits<uint>::max();
1654 T minval = std::numeric_limits<T>::max();
1655 T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
1656 uint minloc = index_max, maxloc = index_max;
1659 const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
1660 const uint * minlocptr = NULL, * maxlocptr = NULL;
1661 if (minVal || minLoc)
1663 minptr = db.ptr<T>();
1664 index += sizeof(T) * groupnum;
1666 if (maxVal || maxLoc)
1668 maxptr = (const T *)(db.ptr() + index);
1669 index += sizeof(T) * groupnum;
1673 minlocptr = (const uint *)(db.ptr() + index);
1674 index += sizeof(uint) * groupnum;
1678 maxlocptr = (const uint *)(db.ptr() + index);
1679 index += sizeof(uint) * groupnum;
1682 maxptr2 = (const T *)(db.ptr() + index);
1684 for (int i = 0; i < groupnum; i++)
1686 if (minptr && minptr[i] <= minval)
1688 if (minptr[i] == minval)
1691 minloc = std::min(minlocptr[i], minloc);
1696 minloc = minlocptr[i];
1700 if (maxptr && maxptr[i] >= maxval)
1702 if (maxptr[i] == maxval)
1705 maxloc = std::min(maxlocptr[i], maxloc);
1710 maxloc = maxlocptr[i];
1714 if (maxptr2 && maxptr2[i] > maxval2)
1715 maxval2 = maxptr2[i];
1717 bool zero_mask = (minLoc && minloc == index_max) ||
1718 (maxLoc && maxloc == index_max);
1721 *minVal = zero_mask ? 0 : (double)minval;
1723 *maxVal = zero_mask ? 0 : (double)maxval;
1725 *maxVal2 = zero_mask ? 0 : (double)maxval2;
1729 minLoc[0] = zero_mask ? -1 : minloc / cols;
1730 minLoc[1] = zero_mask ? -1 : minloc % cols;
1734 maxLoc[0] = zero_mask ? -1 : maxloc / cols;
1735 maxLoc[1] = zero_mask ? -1 : maxloc % cols;
1739 typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal,
1740 int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2);
1742 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
1743 int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL)
1745 const ocl::Device & dev = ocl::Device::getDefault();
1746 bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
1747 haveSrc2 = _src2.kind() != _InputArray::NONE;
1748 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1749 kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
1751 // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014)
1752 if ((haveMask || type == CV_32FC1) && dev.isAMD())
1755 CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
1756 (cn >= 1 && !minLoc && !maxLoc) );
1761 CV_Assert(!haveSrc2 || _src2.type() == type);
1763 if (depth == CV_32S)
1766 if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
1769 int groupnum = dev.maxComputeUnits();
1770 size_t wgs = dev.maxWorkGroupSize();
1772 int wgs2_aligned = 1;
1773 while (wgs2_aligned < (int)wgs)
1777 bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
1778 needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
1780 // in case of mask we must know whether mask is filled with zeros or not
1781 // so let's calculate min or max location, if it's undefined, so mask is zeros
1782 if (!(needMaxLoc || needMinLoc) && haveMask)
1791 String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
1792 " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
1793 " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s",
1794 depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
1795 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
1796 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1797 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1798 _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
1799 needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
1800 needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
1801 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1802 ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]),
1803 absValues ? " -D OP_ABS" : "",
1804 haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
1805 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
1806 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert");
1808 ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
1812 int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
1813 dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
1814 (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
1815 (maxVal2 ? esz : 0));
1816 UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1818 if (cn > 1 && !haveMask)
1820 src = src.reshape(1);
1821 src2 = src2.reshape(1);
1827 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1828 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2));
1830 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1831 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask),
1832 ocl::KernelArg::ReadOnlyNoSize(src2));
1837 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1838 groupnum, ocl::KernelArg::PtrWriteOnly(db));
1840 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1841 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
1844 size_t globalsize = groupnum * wgs;
1845 if (!k.run(1, &globalsize, &wgs, true))
1848 static const getMinMaxResFunc functab[7] =
1850 getMinMaxRes<uchar>,
1852 getMinMaxRes<ushort>,
1853 getMinMaxRes<short>,
1855 getMinMaxRes<float>,
1856 getMinMaxRes<double>
1859 getMinMaxResFunc func = functab[ddepth];
1862 func(db.getMat(ACCESS_READ), minVal, maxVal,
1863 needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
1864 needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc,
1865 groupnum, src.cols, maxVal2);
1874 void cv::minMaxIdx(InputArray _src, double* minVal,
1875 double* maxVal, int* minIdx, int* maxIdx,
1878 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1879 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1880 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
1882 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
1883 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
1885 Mat src = _src.getMat(), mask = _mask.getMat();
1887 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1888 size_t total_size = src.total();
1889 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1890 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1892 IppiSize sz = { cols * cn, rows };
1896 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
1897 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1899 CV_SUPPRESS_DEPRECATED_START
1900 ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
1901 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
1902 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
1903 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
1904 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
1905 CV_SUPPRESS_DEPRECATED_END
1910 IppiPoint minp, maxp;
1911 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1914 *minVal = (double)min;
1916 *maxVal = (double)max;
1917 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.ptr()[0] )
1918 minp.x = maxp.x = -1;
1921 size_t minidx = minp.y * cols + minp.x + 1;
1922 ofs2idx(src, minidx, minIdx);
1926 size_t maxidx = maxp.y * cols + maxp.x + 1;
1927 ofs2idx(src, maxidx, maxIdx);
1931 setIppErrorStatus();
1936 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1938 CV_SUPPRESS_DEPRECATED_START
1939 ippiMinMaxIndxFuncC1 ippFuncC1 =
1940 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
1941 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
1942 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
1943 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 0;
1944 CV_SUPPRESS_DEPRECATED_END
1949 IppiPoint minp, maxp;
1950 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1953 *minVal = (double)min;
1955 *maxVal = (double)max;
1958 size_t minidx = minp.y * cols + minp.x + 1;
1959 ofs2idx(src, minidx, minIdx);
1963 size_t maxidx = maxp.y * cols + maxp.x + 1;
1964 ofs2idx(src, maxidx, maxIdx);
1968 setIppErrorStatus();
1974 MinMaxIdxFunc func = getMinmaxTab(depth);
1975 CV_Assert( func != 0 );
1977 const Mat* arrays[] = {&src, &mask, 0};
1979 NAryMatIterator it(arrays, ptrs);
1981 size_t minidx = 0, maxidx = 0;
1982 int iminval = INT_MAX, imaxval = INT_MIN;
1983 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
1984 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
1985 size_t startidx = 1;
1986 int *minval = &iminval, *maxval = &imaxval;
1987 int planeSize = (int)it.size*cn;
1989 if( depth == CV_32F )
1990 minval = (int*)&fminval, maxval = (int*)&fmaxval;
1991 else if( depth == CV_64F )
1992 minval = (int*)&dminval, maxval = (int*)&dmaxval;
1994 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
1995 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
1998 dminval = dmaxval = 0;
1999 else if( depth == CV_32F )
2000 dminval = fminval, dmaxval = fmaxval;
2001 else if( depth <= CV_32S )
2002 dminval = iminval, dmaxval = imaxval;
2010 ofs2idx(src, minidx, minIdx);
2012 ofs2idx(src, maxidx, maxIdx);
2015 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
2016 Point* minLoc, Point* maxLoc, InputArray mask )
2018 CV_Assert(_img.dims() <= 2);
2020 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
2022 std::swap(minLoc->x, minLoc->y);
2024 std::swap(maxLoc->x, maxLoc->y);
2027 /****************************************************************************************\
2029 \****************************************************************************************/
2034 float normL2Sqr_(const float* a, const float* b, int n)
2036 int j = 0; float d = 0.f;
2040 float CV_DECL_ALIGNED(16) buf[4];
2041 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
2043 for( ; j <= n - 8; j += 8 )
2045 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
2046 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
2047 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
2048 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
2050 _mm_store_ps(buf, _mm_add_ps(d0, d1));
2051 d = buf[0] + buf[1] + buf[2] + buf[3];
2055 float32x4_t v_sum = vdupq_n_f32(0.0f);
2056 for ( ; j <= n - 4; j += 4)
2058 float32x4_t v_diff = vmulq_f32(vld1q_f32(a + j), vld1q_f32(b + j));
2059 v_sum = vaddq_f32(v_sum, vmulq_f32(v_diff, v_diff));
2062 float CV_DECL_ALIGNED(16) buf[4];
2063 vst1q_f32(buf, v_sum);
2064 d = buf[0] + buf[1] + buf[2] + buf[3];
2067 for( ; j <= n - 4; j += 4 )
2069 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
2070 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
2076 float t = a[j] - b[j];
2083 float normL1_(const float* a, const float* b, int n)
2085 int j = 0; float d = 0.f;
2089 float CV_DECL_ALIGNED(16) buf[4];
2090 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
2091 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
2092 __m128 absmask = _mm_load_ps((const float*)absbuf);
2094 for( ; j <= n - 8; j += 8 )
2096 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
2097 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
2098 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
2099 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
2101 _mm_store_ps(buf, _mm_add_ps(d0, d1));
2102 d = buf[0] + buf[1] + buf[2] + buf[3];
2106 float32x4_t v_sum = vdupq_n_f32(0.0f);
2107 for ( ; j <= n - 4; j += 4)
2108 v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j)));
2110 float CV_DECL_ALIGNED(16) buf[4];
2111 vst1q_f32(buf, v_sum);
2112 d = buf[0] + buf[1] + buf[2] + buf[3];
2115 for( ; j <= n - 4; j += 4 )
2117 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
2118 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
2123 d += std::abs(a[j] - b[j]);
2127 int normL1_(const uchar* a, const uchar* b, int n)
2133 __m128i d0 = _mm_setzero_si128();
2135 for( ; j <= n - 16; j += 16 )
2137 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
2138 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
2140 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
2143 for( ; j <= n - 4; j += 4 )
2145 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
2146 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
2148 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
2150 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
2154 uint32x4_t v_sum = vdupq_n_u32(0.0f);
2155 for ( ; j <= n - 16; j += 16)
2157 uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));
2158 uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));
2159 v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));
2160 v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));
2163 uint CV_DECL_ALIGNED(16) buf[4];
2164 vst1q_u32(buf, v_sum);
2165 d = buf[0] + buf[1] + buf[2] + buf[3];
2168 for( ; j <= n - 4; j += 4 )
2170 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
2171 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
2175 d += std::abs(a[j] - b[j]);
2179 static const uchar popCountTable[] =
2181 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2182 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2183 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2184 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2185 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2186 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2187 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2188 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
2191 static const uchar popCountTable2[] =
2193 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
2194 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
2195 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2196 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2197 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2198 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2199 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2200 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
2203 static const uchar popCountTable4[] =
2205 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2206 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2207 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2208 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2209 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2210 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2211 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2212 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
2215 static int normHamming(const uchar* a, int n)
2217 int i = 0, result = 0;
2220 uint32x4_t bits = vmovq_n_u32(0);
2221 for (; i <= n - 16; i += 16) {
2222 uint8x16_t A_vec = vld1q_u8 (a + i);
2223 uint8x16_t bitsSet = vcntq_u8 (A_vec);
2224 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
2225 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
2226 bits = vaddq_u32(bits, bitSet4);
2228 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
2229 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
2230 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
2233 for( ; i <= n - 4; i += 4 )
2234 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
2235 popCountTable[a[i+2]] + popCountTable[a[i+3]];
2237 result += popCountTable[a[i]];
2241 int normHamming(const uchar* a, const uchar* b, int n)
2243 int i = 0, result = 0;
2246 uint32x4_t bits = vmovq_n_u32(0);
2247 for (; i <= n - 16; i += 16) {
2248 uint8x16_t A_vec = vld1q_u8 (a + i);
2249 uint8x16_t B_vec = vld1q_u8 (b + i);
2250 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
2251 uint8x16_t bitsSet = vcntq_u8 (AxorB);
2252 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
2253 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
2254 bits = vaddq_u32(bits, bitSet4);
2256 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
2257 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
2258 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
2261 for( ; i <= n - 4; i += 4 )
2262 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
2263 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
2265 result += popCountTable[a[i] ^ b[i]];
2269 static int normHamming(const uchar* a, int n, int cellSize)
2272 return normHamming(a, n);
2273 const uchar* tab = 0;
2275 tab = popCountTable2;
2276 else if( cellSize == 4 )
2277 tab = popCountTable4;
2279 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
2280 int i = 0, result = 0;
2281 #if CV_ENABLE_UNROLLED
2282 for( ; i <= n - 4; i += 4 )
2283 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
2286 result += tab[a[i]];
2290 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
2293 return normHamming(a, b, n);
2294 const uchar* tab = 0;
2296 tab = popCountTable2;
2297 else if( cellSize == 4 )
2298 tab = popCountTable4;
2300 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
2301 int i = 0, result = 0;
2302 #if CV_ENABLE_UNROLLED
2303 for( ; i <= n - 4; i += 4 )
2304 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
2305 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
2308 result += tab[a[i] ^ b[i]];
2313 template<typename T, typename ST> int
2314 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2316 ST result = *_result;
2319 result = std::max(result, normInf<T, ST>(src, len*cn));
2323 for( int i = 0; i < len; i++, src += cn )
2326 for( int k = 0; k < cn; k++ )
2327 result = std::max(result, ST(std::abs(src[k])));
2334 template<typename T, typename ST> int
2335 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2337 ST result = *_result;
2340 result += normL1<T, ST>(src, len*cn);
2344 for( int i = 0; i < len; i++, src += cn )
2347 for( int k = 0; k < cn; k++ )
2348 result += std::abs(src[k]);
2355 template<typename T, typename ST> int
2356 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2358 ST result = *_result;
2361 result += normL2Sqr<T, ST>(src, len*cn);
2365 for( int i = 0; i < len; i++, src += cn )
2368 for( int k = 0; k < cn; k++ )
2379 template<typename T, typename ST> int
2380 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2382 ST result = *_result;
2385 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
2389 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2392 for( int k = 0; k < cn; k++ )
2393 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
2400 template<typename T, typename ST> int
2401 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2403 ST result = *_result;
2406 result += normL1<T, ST>(src1, src2, len*cn);
2410 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2413 for( int k = 0; k < cn; k++ )
2414 result += std::abs(src1[k] - src2[k]);
2421 template<typename T, typename ST> int
2422 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2424 ST result = *_result;
2427 result += normL2Sqr<T, ST>(src1, src2, len*cn);
2431 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2434 for( int k = 0; k < cn; k++ )
2436 ST v = src1[k] - src2[k];
2446 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
2447 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
2448 { return norm##L##_(src, mask, r, len, cn); } \
2449 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
2450 const uchar* mask, ntype* r, int len, int cn) \
2451 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
2453 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
2454 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
2455 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
2456 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
2458 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
2459 CV_DEF_NORM_ALL(8s, schar, int, int, int)
2460 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
2461 CV_DEF_NORM_ALL(16s, short, int, int, double)
2462 CV_DEF_NORM_ALL(32s, int, int, double, double)
2463 CV_DEF_NORM_ALL(32f, float, float, double, double)
2464 CV_DEF_NORM_ALL(64f, double, double, double, double)
2467 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
2468 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
2470 static NormFunc getNormFunc(int normType, int depth)
2472 static NormFunc normTab[3][8] =
2475 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
2476 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
2479 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
2480 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
2483 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
2484 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
2488 return normTab[normType][depth];
2491 static NormDiffFunc getNormDiffFunc(int normType, int depth)
2493 static NormDiffFunc normDiffTab[3][8] =
2496 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
2497 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
2498 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
2499 (NormDiffFunc)normDiffInf_64f, 0
2502 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
2503 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
2504 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
2505 (NormDiffFunc)normDiffL1_64f, 0
2508 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
2509 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
2510 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
2511 (NormDiffFunc)normDiffL2_64f, 0
2515 return normDiffTab[normType][depth];
2520 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
2522 const ocl::Device & d = ocl::Device::getDefault();
2523 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2524 bool doubleSupport = d.doubleFPConfig() > 0,
2525 haveMask = _mask.kind() != _InputArray::NONE;
2527 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
2528 (!doubleSupport && depth == CV_64F))
2531 UMat src = _src.getUMat();
2533 if (normType == NORM_INF)
2535 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
2536 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
2539 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2542 bool unstype = depth == CV_8U || depth == CV_16U;
2544 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
2545 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2552 for (int i = 0; i < cn; ++i)
2555 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2565 double cv::norm( InputArray _src, int normType, InputArray _mask )
2567 normType &= NORM_TYPE_MASK;
2568 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2569 normType == NORM_L2 || normType == NORM_L2SQR ||
2570 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
2574 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
2575 ocl_norm(_src, normType, _mask, _result),
2579 Mat src = _src.getMat(), mask = _mask.getMat();
2580 int depth = src.depth(), cn = src.channels();
2582 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2583 size_t total_size = src.total();
2584 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
2586 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
2587 && cols > 0 && (size_t)rows*cols == total_size
2588 && (normType == NORM_INF || normType == NORM_L1 ||
2589 normType == NORM_L2 || normType == NORM_L2SQR) )
2591 IppiSize sz = { cols, rows };
2592 int type = src.type();
2595 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2596 ippiMaskNormFuncC1 ippFuncC1 =
2597 normType == NORM_INF ?
2598 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
2599 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
2600 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
2601 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
2603 normType == NORM_L1 ?
2604 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
2605 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
2606 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
2607 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
2609 normType == NORM_L2 || normType == NORM_L2SQR ?
2610 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
2611 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
2612 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
2613 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
2618 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
2619 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2621 setIppErrorStatus();
2623 /*typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2624 ippiMaskNormFuncC3 ippFuncC3 =
2625 normType == NORM_INF ?
2626 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
2627 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
2628 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
2629 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
2631 normType == NORM_L1 ?
2632 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
2633 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
2634 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
2635 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
2637 normType == NORM_L2 || normType == NORM_L2SQR ?
2638 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
2639 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
2640 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
2641 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
2645 Ipp64f norm1, norm2, norm3;
2646 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2647 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2648 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2651 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2652 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2653 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2655 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2657 setIppErrorStatus();
2662 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2663 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
2664 ippiNormFuncHint ippFuncHint =
2665 normType == NORM_L1 ?
2666 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
2667 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
2668 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
2670 normType == NORM_L2 || normType == NORM_L2SQR ?
2671 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
2672 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
2673 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
2675 ippiNormFuncNoHint ippFuncNoHint =
2676 normType == NORM_INF ?
2677 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
2678 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
2679 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
2680 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
2681 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
2682 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
2683 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
2684 #if (IPP_VERSION_X100 >= 801)
2685 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2686 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2688 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
2689 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
2690 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
2692 normType == NORM_L1 ?
2693 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
2694 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
2695 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
2696 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
2697 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
2698 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
2699 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
2700 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
2701 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
2703 normType == NORM_L2 || normType == NORM_L2SQR ?
2704 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
2705 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
2706 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
2707 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
2708 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
2709 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
2710 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
2711 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
2712 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
2714 // Make sure only zero or one version of the function pointer is valid
2715 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2716 if( ippFuncHint || ippFuncNoHint )
2718 Ipp64f norm_array[4];
2719 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
2720 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, norm_array);
2723 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2724 for( int i = 1; i < cn; i++ )
2727 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2728 normType == NORM_L1 ? norm + norm_array[i] :
2729 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2732 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2734 setIppErrorStatus();
2740 if( src.isContinuous() && mask.empty() )
2742 size_t len = src.total()*cn;
2743 if( len == (size_t)(int)len )
2745 if( depth == CV_32F )
2747 const float* data = src.ptr<float>();
2749 if( normType == NORM_L2 )
2752 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2753 return std::sqrt(result);
2755 if( normType == NORM_L2SQR )
2758 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2761 if( normType == NORM_L1 )
2764 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
2767 if( normType == NORM_INF )
2770 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
2774 if( depth == CV_8U )
2776 const uchar* data = src.ptr<uchar>();
2778 if( normType == NORM_HAMMING )
2779 return normHamming(data, (int)len);
2781 if( normType == NORM_HAMMING2 )
2782 return normHamming(data, (int)len, 2);
2787 CV_Assert( mask.empty() || mask.type() == CV_8U );
2789 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2794 bitwise_and(src, mask, temp);
2795 return norm(temp, normType);
2797 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2799 const Mat* arrays[] = {&src, 0};
2801 NAryMatIterator it(arrays, ptrs);
2802 int total = (int)it.size;
2805 for( size_t i = 0; i < it.nplanes; i++, ++it )
2806 result += normHamming(ptrs[0], total, cellSize);
2811 NormFunc func = getNormFunc(normType >> 1, depth);
2812 CV_Assert( func != 0 );
2814 const Mat* arrays[] = {&src, &mask, 0};
2824 NAryMatIterator it(arrays, ptrs);
2825 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2826 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2827 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2829 int *ibuf = &result.i;
2834 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
2835 blockSize = std::min(blockSize, intSumBlockSize);
2837 esz = src.elemSize();
2840 for( size_t i = 0; i < it.nplanes; i++, ++it )
2842 for( j = 0; j < total; j += blockSize )
2844 int bsz = std::min(total - j, blockSize);
2845 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
2847 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2859 if( normType == NORM_INF )
2861 if( depth == CV_64F )
2863 else if( depth == CV_32F )
2864 result.d = result.f;
2866 result.d = result.i;
2868 else if( normType == NORM_L2 )
2869 result.d = std::sqrt(result.d);
2878 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2881 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2882 bool relative = (normType & NORM_RELATIVE) != 0;
2883 normType &= ~NORM_RELATIVE;
2884 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
2888 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
2889 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
2894 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
2895 false, _src2, relative ? &sc2[0] : NULL))
2901 for (int i = 0; i < cn; ++i)
2908 if (normType == NORM_L2)
2910 result = std::sqrt(result);
2916 result /= (s2 + DBL_EPSILON);
2925 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
2927 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
2931 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
2932 ocl_norm(_src1, _src2, normType, _mask, _result),
2936 if( normType & CV_RELATIVE )
2938 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2939 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2941 normType &= NORM_TYPE_MASK;
2942 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
2943 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2944 size_t total_size = src1.total();
2945 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
2946 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2947 && cols > 0 && (size_t)rows*cols == total_size
2948 && (normType == NORM_INF || normType == NORM_L1 ||
2949 normType == NORM_L2 || normType == NORM_L2SQR) )
2951 IppiSize sz = { cols, rows };
2952 int type = src1.type();
2955 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2956 ippiMaskNormRelFuncC1 ippFuncC1 =
2957 normType == NORM_INF ?
2958 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
2960 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
2962 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
2963 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
2965 normType == NORM_L1 ?
2966 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
2968 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
2970 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
2971 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
2973 normType == NORM_L2 || normType == NORM_L2SQR ?
2974 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
2975 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
2976 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
2977 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
2982 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
2983 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2984 setIppErrorStatus();
2989 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2990 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2991 ippiNormRelFuncNoHint ippFuncNoHint =
2992 normType == NORM_INF ?
2993 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
2994 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
2995 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
2996 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
2998 normType == NORM_L1 ?
2999 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
3000 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
3001 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
3003 normType == NORM_L2 || normType == NORM_L2SQR ?
3004 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
3005 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
3006 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
3008 ippiNormRelFuncHint ippFuncHint =
3009 normType == NORM_L1 ?
3010 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
3012 normType == NORM_L2 || normType == NORM_L2SQR ?
3013 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
3018 if( ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm) >= 0 )
3019 return (double)norm;
3020 setIppErrorStatus();
3025 if( ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
3026 return (double)norm;
3027 setIppErrorStatus();
3032 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
3035 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3036 int depth = src1.depth(), cn = src1.channels();
3039 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
3040 normType == NORM_L2 || normType == NORM_L2SQR ||
3041 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
3043 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3044 size_t total_size = src1.total();
3045 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
3046 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
3047 && cols > 0 && (size_t)rows*cols == total_size
3048 && (normType == NORM_INF || normType == NORM_L1 ||
3049 normType == NORM_L2 || normType == NORM_L2SQR) )
3051 IppiSize sz = { cols, rows };
3052 int type = src1.type();
3055 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
3056 ippiMaskNormDiffFuncC1 ippFuncC1 =
3057 normType == NORM_INF ?
3058 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
3059 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
3060 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
3061 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
3063 normType == NORM_L1 ?
3064 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
3066 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
3068 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
3069 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
3071 normType == NORM_L2 || normType == NORM_L2SQR ?
3072 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
3073 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
3074 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
3075 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
3080 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
3081 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
3082 setIppErrorStatus();
3085 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
3086 ippiMaskNormDiffFuncC3 ippFuncC3 =
3087 normType == NORM_INF ?
3088 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
3089 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
3090 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
3091 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
3093 normType == NORM_L1 ?
3094 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
3095 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
3096 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
3097 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
3099 normType == NORM_L2 || normType == NORM_L2SQR ?
3100 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
3101 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
3102 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
3103 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
3107 Ipp64f norm1, norm2, norm3;
3108 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
3109 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
3110 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
3113 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
3114 normType == NORM_L1 ? norm1 + norm2 + norm3 :
3115 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
3117 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
3119 setIppErrorStatus();
3125 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
3126 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
3127 ippiNormDiffFuncHint ippFuncHint =
3128 normType == NORM_L1 ?
3129 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
3130 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
3131 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
3133 normType == NORM_L2 || normType == NORM_L2SQR ?
3134 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
3135 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
3136 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
3138 ippiNormDiffFuncNoHint ippFuncNoHint =
3139 normType == NORM_INF ?
3140 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
3141 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
3142 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
3143 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
3144 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
3145 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
3146 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
3147 #if (IPP_VERSION_X100 >= 801)
3148 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
3149 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
3151 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
3152 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
3153 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
3155 normType == NORM_L1 ?
3156 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
3157 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
3158 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
3159 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
3160 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
3161 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
3162 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
3163 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
3164 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
3166 normType == NORM_L2 || normType == NORM_L2SQR ?
3167 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
3168 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
3169 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
3170 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
3171 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
3172 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
3173 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
3174 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
3175 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
3177 // Make sure only zero or one version of the function pointer is valid
3178 CV_Assert(!ippFuncHint || !ippFuncNoHint);
3179 if( ippFuncHint || ippFuncNoHint )
3181 Ipp64f norm_array[4];
3182 IppStatus ret = ippFuncHint ? ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
3183 ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array);
3186 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
3187 for( int i = 1; i < src1.channels(); i++ )
3190 normType == NORM_INF ? std::max(norm, norm_array[i]) :
3191 normType == NORM_L1 ? norm + norm_array[i] :
3192 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
3195 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
3197 setIppErrorStatus();
3203 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
3205 size_t len = src1.total()*src1.channels();
3206 if( len == (size_t)(int)len )
3208 if( src1.depth() == CV_32F )
3210 const float* data1 = src1.ptr<float>();
3211 const float* data2 = src2.ptr<float>();
3213 if( normType == NORM_L2 )
3216 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
3217 return std::sqrt(result);
3219 if( normType == NORM_L2SQR )
3222 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
3225 if( normType == NORM_L1 )
3228 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
3231 if( normType == NORM_INF )
3234 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
3241 CV_Assert( mask.empty() || mask.type() == CV_8U );
3243 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
3248 bitwise_xor(src1, src2, temp);
3249 bitwise_and(temp, mask, temp);
3250 return norm(temp, normType);
3252 int cellSize = normType == NORM_HAMMING ? 1 : 2;
3254 const Mat* arrays[] = {&src1, &src2, 0};
3256 NAryMatIterator it(arrays, ptrs);
3257 int total = (int)it.size;
3260 for( size_t i = 0; i < it.nplanes; i++, ++it )
3261 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
3266 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
3267 CV_Assert( func != 0 );
3269 const Mat* arrays[] = {&src1, &src2, &mask, 0};
3280 NAryMatIterator it(arrays, ptrs);
3281 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
3282 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
3283 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
3285 unsigned *ibuf = &result.u;
3290 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
3291 blockSize = std::min(blockSize, intSumBlockSize);
3293 esz = src1.elemSize();
3296 for( size_t i = 0; i < it.nplanes; i++, ++it )
3298 for( j = 0; j < total; j += blockSize )
3300 int bsz = std::min(total - j, blockSize);
3301 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
3303 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
3316 if( normType == NORM_INF )
3318 if( depth == CV_64F )
3320 else if( depth == CV_32F )
3321 result.d = result.f;
3323 result.d = result.u;
3325 else if( normType == NORM_L2 )
3326 result.d = std::sqrt(result.d);
3332 ///////////////////////////////////// batch distance ///////////////////////////////////////
3337 template<typename _Tp, typename _Rt>
3338 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
3339 int nvecs, int len, _Rt* dist, const uchar* mask)
3341 step2 /= sizeof(src2[0]);
3344 for( int i = 0; i < nvecs; i++ )
3345 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
3349 _Rt val0 = std::numeric_limits<_Rt>::max();
3350 for( int i = 0; i < nvecs; i++ )
3351 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3355 template<typename _Tp, typename _Rt>
3356 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
3357 int nvecs, int len, _Rt* dist, const uchar* mask)
3359 step2 /= sizeof(src2[0]);
3362 for( int i = 0; i < nvecs; i++ )
3363 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
3367 _Rt val0 = std::numeric_limits<_Rt>::max();
3368 for( int i = 0; i < nvecs; i++ )
3369 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3373 template<typename _Tp, typename _Rt>
3374 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
3375 int nvecs, int len, _Rt* dist, const uchar* mask)
3377 step2 /= sizeof(src2[0]);
3380 for( int i = 0; i < nvecs; i++ )
3381 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
3385 _Rt val0 = std::numeric_limits<_Rt>::max();
3386 for( int i = 0; i < nvecs; i++ )
3387 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
3391 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
3392 int nvecs, int len, int* dist, const uchar* mask)
3394 step2 /= sizeof(src2[0]);
3397 for( int i = 0; i < nvecs; i++ )
3398 dist[i] = normHamming(src1, src2 + step2*i, len);
3403 for( int i = 0; i < nvecs; i++ )
3404 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
3408 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
3409 int nvecs, int len, int* dist, const uchar* mask)
3411 step2 /= sizeof(src2[0]);
3414 for( int i = 0; i < nvecs; i++ )
3415 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
3420 for( int i = 0; i < nvecs; i++ )
3421 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
3425 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3426 int nvecs, int len, int* dist, const uchar* mask)
3428 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3431 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3432 int nvecs, int len, float* dist, const uchar* mask)
3434 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3437 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3438 int nvecs, int len, int* dist, const uchar* mask)
3440 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3443 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3444 int nvecs, int len, float* dist, const uchar* mask)
3446 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3449 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3450 int nvecs, int len, float* dist, const uchar* mask)
3452 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3455 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
3456 int nvecs, int len, float* dist, const uchar* mask)
3458 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3461 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
3462 int nvecs, int len, float* dist, const uchar* mask)
3464 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3467 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
3468 int nvecs, int len, float* dist, const uchar* mask)
3470 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3473 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
3474 int nvecs, int len, uchar* dist, const uchar* mask);
3477 struct BatchDistInvoker : public ParallelLoopBody
3479 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
3480 Mat& _dist, Mat& _nidx, int _K,
3481 const Mat& _mask, int _update,
3482 BatchDistFunc _func)
3494 void operator()(const Range& range) const
3496 AutoBuffer<int> buf(src2->rows);
3499 for( int i = range.start; i < range.end; i++ )
3501 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
3502 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
3506 int* nidxptr = nidx->ptr<int>(i);
3507 // since positive float's can be compared just like int's,
3508 // we handle both CV_32S and CV_32F cases with a single branch
3509 int* distptr = (int*)dist->ptr(i);
3513 for( j = 0; j < src2->rows; j++ )
3516 if( d < distptr[K-1] )
3518 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
3520 nidxptr[k+1] = nidxptr[k];
3521 distptr[k+1] = distptr[k];
3523 nidxptr[k+1] = j + update;
3543 void cv::batchDistance( InputArray _src1, InputArray _src2,
3544 OutputArray _dist, int dtype, OutputArray _nidx,
3545 int normType, int K, InputArray _mask,
3546 int update, bool crosscheck )
3548 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3549 int type = src1.type();
3550 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
3551 (type == CV_32F || type == CV_8U));
3552 CV_Assert( _nidx.needed() == (K > 0) );
3556 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
3558 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
3560 K = std::min(K, src2.rows);
3562 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
3563 Mat dist = _dist.getMat(), nidx;
3564 if( _nidx.needed() )
3566 _nidx.create(dist.size(), CV_32S);
3567 nidx = _nidx.getMat();
3570 if( update == 0 && K > 0 )
3572 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
3573 nidx = Scalar::all(-1);
3578 CV_Assert( K == 1 && update == 0 && mask.empty() );
3580 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
3582 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
3583 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
3584 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
3585 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
3586 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
3587 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
3588 if( dtype == CV_32S )
3590 for( int i = 0; i < tdist.rows; i++ )
3592 int idx = tidx.at<int>(i);
3593 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
3596 dist.at<int>(idx) = d;
3597 nidx.at<int>(idx) = i + update;
3603 for( int i = 0; i < tdist.rows; i++ )
3605 int idx = tidx.at<int>(i);
3606 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
3609 dist.at<float>(idx) = d;
3610 nidx.at<int>(idx) = i + update;
3617 BatchDistFunc func = 0;
3620 if( normType == NORM_L1 && dtype == CV_32S )
3621 func = (BatchDistFunc)batchDistL1_8u32s;
3622 else if( normType == NORM_L1 && dtype == CV_32F )
3623 func = (BatchDistFunc)batchDistL1_8u32f;
3624 else if( normType == NORM_L2SQR && dtype == CV_32S )
3625 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
3626 else if( normType == NORM_L2SQR && dtype == CV_32F )
3627 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
3628 else if( normType == NORM_L2 && dtype == CV_32F )
3629 func = (BatchDistFunc)batchDistL2_8u32f;
3630 else if( normType == NORM_HAMMING && dtype == CV_32S )
3631 func = (BatchDistFunc)batchDistHamming;
3632 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
3633 func = (BatchDistFunc)batchDistHamming2;
3635 else if( type == CV_32F && dtype == CV_32F )
3637 if( normType == NORM_L1 )
3638 func = (BatchDistFunc)batchDistL1_32f;
3639 else if( normType == NORM_L2SQR )
3640 func = (BatchDistFunc)batchDistL2Sqr_32f;
3641 else if( normType == NORM_L2 )
3642 func = (BatchDistFunc)batchDistL2_32f;
3646 CV_Error_(CV_StsUnsupportedFormat,
3647 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
3648 type, dtype, normType));
3650 parallel_for_(Range(0, src1.rows),
3651 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
3655 void cv::findNonZero( InputArray _src, OutputArray _idx )
3657 Mat src = _src.getMat();
3658 CV_Assert( src.type() == CV_8UC1 );
3659 int n = countNonZero(src);
3660 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
3662 _idx.create(n, 1, CV_32SC2);
3663 Mat idx = _idx.getMat();
3664 CV_Assert(idx.isContinuous());
3665 Point* idx_ptr = idx.ptr<Point>();
3667 for( int i = 0; i < src.rows; i++ )
3669 const uchar* bin_ptr = src.ptr(i);
3670 for( int j = 0; j < src.cols; j++ )
3672 *idx_ptr++ = Point(j, i);
3676 double cv::PSNR(InputArray _src1, InputArray _src2)
3678 CV_Assert( _src1.depth() == CV_8U );
3679 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
3680 return 20*log10(255./(diff+DBL_EPSILON));
3684 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
3686 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
3687 if( CV_IS_IMAGE(srcarr) )
3689 int coi = cvGetImageCOI((IplImage*)srcarr);
3692 CV_Assert( 0 < coi && coi <= 4 );
3693 sum = cv::Scalar(sum[coi-1]);
3699 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
3701 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3702 if( img.channels() > 1 )
3703 cv::extractImageCOI(imgarr, img);
3704 return countNonZero(img);
3709 cvAvg( const void* imgarr, const void* maskarr )
3711 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3712 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
3713 if( CV_IS_IMAGE(imgarr) )
3715 int coi = cvGetImageCOI((IplImage*)imgarr);
3718 CV_Assert( 0 < coi && coi <= 4 );
3719 mean = cv::Scalar(mean[coi-1]);
3727 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
3729 cv::Scalar mean, sdv;
3733 mask = cv::cvarrToMat(maskarr);
3735 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
3737 if( CV_IS_IMAGE(imgarr) )
3739 int coi = cvGetImageCOI((IplImage*)imgarr);
3742 CV_Assert( 0 < coi && coi <= 4 );
3743 mean = cv::Scalar(mean[coi-1]);
3744 sdv = cv::Scalar(sdv[coi-1]);
3749 *(cv::Scalar*)_mean = mean;
3751 *(cv::Scalar*)_sdv = sdv;
3756 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
3757 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
3759 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
3761 mask = cv::cvarrToMat(maskarr);
3762 if( img.channels() > 1 )
3763 cv::extractImageCOI(imgarr, img);
3765 cv::minMaxLoc( img, _minVal, _maxVal,
3766 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
3771 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
3780 a = cv::cvarrToMat(imgA, false, true, 1);
3782 mask = cv::cvarrToMat(maskarr);
3784 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
3785 cv::extractImageCOI(imgA, a);
3788 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
3790 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
3791 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
3792 cv::extractImageCOI(imgB, b);
3794 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);