1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
47 #include "opencl_kernels_core.hpp"
52 template<typename T> static inline Scalar rawToScalar(const T& v)
55 typedef typename DataType<T>::channel_type T1;
56 int i, n = DataType<T>::channels;
57 for( i = 0; i < n; i++ )
58 s.val[i] = ((T1*)&v)[i];
62 /****************************************************************************************\
64 \****************************************************************************************/
66 template <typename T, typename ST>
69 int operator () (const T *, const uchar *, ST *, int, int) const
78 struct Sum_SIMD<uchar, int>
80 int operator () (const uchar * src0, const uchar * mask, int * dst, int len, int cn) const
82 if (mask || (cn != 1 && cn != 2 && cn != 4))
86 uint32x4_t v_sum = vdupq_n_u32(0u);
88 for ( ; x <= len - 16; x += 16)
90 uint8x16_t v_src = vld1q_u8(src0 + x);
91 uint16x8_t v_half = vmovl_u8(vget_low_u8(v_src));
93 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_half)));
94 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_half)));
96 v_half = vmovl_u8(vget_high_u8(v_src));
97 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_half)));
98 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_half)));
101 for ( ; x <= len - 8; x += 8)
103 uint16x8_t v_src = vmovl_u8(vld1_u8(src0 + x));
105 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_src)));
106 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_src)));
109 unsigned int CV_DECL_ALIGNED(16) ar[4];
110 vst1q_u32(ar, v_sum);
112 for (int i = 0; i < 4; i += cn)
113 for (int j = 0; j < cn; ++j)
121 struct Sum_SIMD<schar, int>
123 int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const
125 if (mask || (cn != 1 && cn != 2 && cn != 4))
129 int32x4_t v_sum = vdupq_n_s32(0);
131 for ( ; x <= len - 16; x += 16)
133 int8x16_t v_src = vld1q_s8(src0 + x);
134 int16x8_t v_half = vmovl_s8(vget_low_s8(v_src));
136 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_half)));
137 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_half)));
139 v_half = vmovl_s8(vget_high_s8(v_src));
140 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_half)));
141 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_half)));
144 for ( ; x <= len - 8; x += 8)
146 int16x8_t v_src = vmovl_s8(vld1_s8(src0 + x));
148 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_src)));
149 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_src)));
152 int CV_DECL_ALIGNED(16) ar[4];
153 vst1q_s32(ar, v_sum);
155 for (int i = 0; i < 4; i += cn)
156 for (int j = 0; j < cn; ++j)
164 struct Sum_SIMD<ushort, int>
166 int operator () (const ushort * src0, const uchar * mask, int * dst, int len, int cn) const
168 if (mask || (cn != 1 && cn != 2 && cn != 4))
172 uint32x4_t v_sum = vdupq_n_u32(0u);
174 for ( ; x <= len - 8; x += 8)
176 uint16x8_t v_src = vld1q_u16(src0 + x);
178 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_low_u16(v_src)));
179 v_sum = vaddq_u32(v_sum, vmovl_u16(vget_high_u16(v_src)));
182 for ( ; x <= len - 4; x += 4)
183 v_sum = vaddq_u32(v_sum, vmovl_u16(vld1_u16(src0 + x)));
185 unsigned int CV_DECL_ALIGNED(16) ar[4];
186 vst1q_u32(ar, v_sum);
188 for (int i = 0; i < 4; i += cn)
189 for (int j = 0; j < cn; ++j)
197 struct Sum_SIMD<short, int>
199 int operator () (const short * src0, const uchar * mask, int * dst, int len, int cn) const
201 if (mask || (cn != 1 && cn != 2 && cn != 4))
205 int32x4_t v_sum = vdupq_n_s32(0u);
207 for ( ; x <= len - 8; x += 8)
209 int16x8_t v_src = vld1q_s16(src0 + x);
211 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_low_s16(v_src)));
212 v_sum = vaddq_s32(v_sum, vmovl_s16(vget_high_s16(v_src)));
215 for ( ; x <= len - 4; x += 4)
216 v_sum = vaddq_s32(v_sum, vmovl_s16(vld1_s16(src0 + x)));
218 int CV_DECL_ALIGNED(16) ar[4];
219 vst1q_s32(ar, v_sum);
221 for (int i = 0; i < 4; i += cn)
222 for (int j = 0; j < cn; ++j)
231 template<typename T, typename ST>
232 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
238 int i = vop(src0, mask, dst, len, cn), k = cn % 4;
245 #if CV_ENABLE_UNROLLED
246 for(; i <= len - 4; i += 4, src += cn*4 )
247 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
249 for( ; i < len; i++, src += cn )
255 ST s0 = dst[0], s1 = dst[1];
256 for( ; i < len; i++, src += cn )
266 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
267 for( ; i < len; i++, src += cn )
278 for( ; k < cn; k += 4 )
280 src = src0 + i*cn + k;
281 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
282 for( ; i < len; i++, src += cn )
284 s0 += src[0]; s1 += src[1];
285 s2 += src[2]; s3 += src[3];
299 for( i = 0; i < len; i++ )
309 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
310 for( i = 0; i < len; i++, src += 3 )
324 for( i = 0; i < len; i++, src += cn )
328 #if CV_ENABLE_UNROLLED
329 for( ; k <= cn - 4; k += 4 )
332 s0 = dst[k] + src[k];
333 s1 = dst[k+1] + src[k+1];
334 dst[k] = s0; dst[k+1] = s1;
335 s0 = dst[k+2] + src[k+2];
336 s1 = dst[k+3] + src[k+3];
337 dst[k+2] = s0; dst[k+3] = s1;
349 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
350 { return sum_(src, mask, dst, len, cn); }
352 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
353 { return sum_(src, mask, dst, len, cn); }
355 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
356 { return sum_(src, mask, dst, len, cn); }
358 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
359 { return sum_(src, mask, dst, len, cn); }
361 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
362 { return sum_(src, mask, dst, len, cn); }
364 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
365 { return sum_(src, mask, dst, len, cn); }
367 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
368 { return sum_(src, mask, dst, len, cn); }
370 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
372 static SumFunc getSumFunc(int depth)
374 static SumFunc sumTab[] =
376 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
377 (SumFunc)sum16u, (SumFunc)sum16s,
379 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
383 return sumTab[depth];
387 static int countNonZero_(const T* src, int len )
390 #if CV_ENABLE_UNROLLED
391 for(; i <= len - 4; i += 4 )
392 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
394 for( ; i < len; i++ )
399 static int countNonZero8u( const uchar* src, int len )
405 __m128i pattern = _mm_setzero_si128 ();
406 static uchar tab[256];
407 static volatile bool initialized = false;
410 // we compute inverse popcount table,
411 // since we pass (img[x] == 0) mask as index in the table.
412 for( int j = 0; j < 256; j++ )
415 for( int mask = 1; mask < 256; mask += mask )
416 val += (j & mask) == 0;
422 for (; i<=len-16; i+=16)
424 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
425 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
426 nz += tab[val & 255] + tab[val >> 8];
430 int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6;
431 uint32x4_t v_nz = vdupq_n_u32(0u);
432 uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1);
433 const uchar * src0 = src;
437 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
439 while (j < blockSizei)
441 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
442 uint8x16_t v_pz = v_zero;
444 for( ; k <= blockSizej - 16; k += 16 )
445 v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1));
447 uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz));
448 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz);
449 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz);
458 CV_DECL_ALIGNED(16) unsigned int buf[4];
459 vst1q_u32(buf, v_nz);
460 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
462 for( ; i < len; i++ )
467 static int countNonZero16u( const ushort* src, int len )
471 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
472 uint32x4_t v_nz = vdupq_n_u32(0u);
473 uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1);
477 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
479 while (j < blockSizei)
481 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
482 uint16x8_t v_pz = v_zero;
484 for( ; k <= blockSizej - 8; k += 8 )
485 v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1));
487 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
496 CV_DECL_ALIGNED(16) unsigned int buf[4];
497 vst1q_u32(buf, v_nz);
498 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
500 return nz + countNonZero_(src, len - i);
503 static int countNonZero32s( const int* src, int len )
507 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
508 uint32x4_t v_nz = vdupq_n_u32(0u);
509 int32x4_t v_zero = vdupq_n_s32(0.0f);
510 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
514 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
516 while (j < blockSizei)
518 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
519 uint16x8_t v_pz = v_zerou;
521 for( ; k <= blockSizej - 8; k += 8 )
522 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)),
523 vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1));
525 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
534 CV_DECL_ALIGNED(16) unsigned int buf[4];
535 vst1q_u32(buf, v_nz);
536 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
538 return nz + countNonZero_(src, len - i);
541 static int countNonZero32f( const float* src, int len )
545 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6;
546 uint32x4_t v_nz = vdupq_n_u32(0u);
547 float32x4_t v_zero = vdupq_n_f32(0.0f);
548 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u);
552 int blockSizei = std::min(len0 - i, blockSize0), j = 0;
554 while (j < blockSizei)
556 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0;
557 uint16x8_t v_pz = v_zerou;
559 for( ; k <= blockSizej - 8; k += 8 )
560 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)),
561 vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1));
563 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz);
572 CV_DECL_ALIGNED(16) unsigned int buf[4];
573 vst1q_u32(buf, v_nz);
574 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]);
576 return nz + countNonZero_(src, len - i);
579 static int countNonZero64f( const double* src, int len )
580 { return countNonZero_(src, len); }
582 typedef int (*CountNonZeroFunc)(const uchar*, int);
584 static CountNonZeroFunc getCountNonZeroTab(int depth)
586 static CountNonZeroFunc countNonZeroTab[] =
588 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
589 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
590 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
591 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
594 return countNonZeroTab[depth];
597 template<typename T, typename ST, typename SQT>
598 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
611 for( i = 0; i < len; i++, src += cn )
614 s0 += v; sq0 += (SQT)v*v;
621 ST s0 = sum[0], s1 = sum[1];
622 SQT sq0 = sqsum[0], sq1 = sqsum[1];
623 for( i = 0; i < len; i++, src += cn )
625 T v0 = src[0], v1 = src[1];
626 s0 += v0; sq0 += (SQT)v0*v0;
627 s1 += v1; sq1 += (SQT)v1*v1;
629 sum[0] = s0; sum[1] = s1;
630 sqsum[0] = sq0; sqsum[1] = sq1;
634 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
635 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
636 for( i = 0; i < len; i++, src += cn )
638 T v0 = src[0], v1 = src[1], v2 = src[2];
639 s0 += v0; sq0 += (SQT)v0*v0;
640 s1 += v1; sq1 += (SQT)v1*v1;
641 s2 += v2; sq2 += (SQT)v2*v2;
643 sum[0] = s0; sum[1] = s1; sum[2] = s2;
644 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
647 for( ; k < cn; k += 4 )
650 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
651 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
652 for( i = 0; i < len; i++, src += cn )
655 v0 = src[0], v1 = src[1];
656 s0 += v0; sq0 += (SQT)v0*v0;
657 s1 += v1; sq1 += (SQT)v1*v1;
658 v0 = src[2], v1 = src[3];
659 s2 += v0; sq2 += (SQT)v0*v0;
660 s3 += v1; sq3 += (SQT)v1*v1;
662 sum[k] = s0; sum[k+1] = s1;
663 sum[k+2] = s2; sum[k+3] = s3;
664 sqsum[k] = sq0; sqsum[k+1] = sq1;
665 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
676 for( i = 0; i < len; i++ )
680 s0 += v; sq0 += (SQT)v*v;
688 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
689 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
690 for( i = 0; i < len; i++, src += 3 )
693 T v0 = src[0], v1 = src[1], v2 = src[2];
694 s0 += v0; sq0 += (SQT)v0*v0;
695 s1 += v1; sq1 += (SQT)v1*v1;
696 s2 += v2; sq2 += (SQT)v2*v2;
699 sum[0] = s0; sum[1] = s1; sum[2] = s2;
700 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
704 for( i = 0; i < len; i++, src += cn )
707 for( int k = 0; k < cn; k++ )
711 SQT sq = sqsum[k] + (SQT)v*v;
712 sum[k] = s; sqsum[k] = sq;
721 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
722 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
724 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
725 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
727 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
728 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
730 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
731 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
733 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
734 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
736 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
737 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
739 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
740 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
742 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
744 static SumSqrFunc getSumSqrTab(int depth)
746 static SumSqrFunc sumSqrTab[] =
748 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
749 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
752 return sumSqrTab[depth];
757 template <typename T> Scalar ocl_part_sum(Mat m)
759 CV_Assert(m.rows == 1);
761 Scalar s = Scalar::all(0);
762 int cn = m.channels();
763 const T * const ptr = m.ptr<T>(0);
765 for (int x = 0, w = m.cols * cn; x < w; )
766 for (int c = 0; c < cn; ++c, ++x)
772 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
774 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(),
775 InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() )
777 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
779 const ocl::Device & dev = ocl::Device::getDefault();
780 bool doubleSupport = dev.doubleFPConfig() > 0,
781 haveMask = _mask.kind() != _InputArray::NONE,
782 haveSrc2 = _src2.kind() != _InputArray::NONE;
783 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
784 kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1,
785 mcn = std::max(cn, kercn);
786 CV_Assert(!haveSrc2 || _src2.type() == type);
787 int convert_cn = haveSrc2 ? mcn : cn;
789 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
792 int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
793 size_t wgs = dev.maxWorkGroupSize();
795 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
796 dtype = CV_MAKE_TYPE(ddepth, cn);
797 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
799 int wgs2_aligned = 1;
800 while (wgs2_aligned < (int)wgs)
804 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
806 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
807 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s",
808 ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth),
809 ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)),
810 ocl::typeToStr(ddepth), ddepth, cn,
811 ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]),
812 opMap[sum_op], (int)wgs, wgs2_aligned,
813 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
814 haveMask ? " -D HAVE_MASK" : "",
815 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
816 haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
817 haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "",
818 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "",
819 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert");
821 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
825 UMat src = _src.getUMat(), src2 = _src2.getUMat(),
826 db(1, dbsize, dtype), mask = _mask.getUMat();
828 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
829 dbarg = ocl::KernelArg::PtrWriteOnly(db),
830 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
831 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2);
836 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg);
838 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg);
843 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg);
845 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg);
848 size_t globalsize = ngroups * wgs;
849 if (k.run(1, &globalsize, &wgs, false))
851 typedef Scalar (*part_sum)(Mat m);
852 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
853 func = funcs[ddepth - CV_32S];
855 Mat mres = db.getMat(ACCESS_READ);
857 const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
859 res = func(mres.colRange(0, ngroups));
869 cv::Scalar cv::sum( InputArray _src )
873 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
874 ocl_sum(_src, _res, OCL_OP_SUM),
878 Mat src = _src.getMat();
879 int k, cn = src.channels(), depth = src.depth();
881 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
882 size_t total_size = src.total();
883 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
884 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
886 IppiSize sz = { cols, rows };
887 int type = src.type();
888 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
889 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
890 ippiSumFuncHint ippFuncHint =
891 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
892 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
893 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
895 ippiSumFuncNoHint ippFuncNoHint =
896 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
897 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
898 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
899 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
900 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
901 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
902 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
903 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
904 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
906 CV_Assert(!ippFuncHint || !ippFuncNoHint);
907 if( ippFuncHint || ippFuncNoHint )
910 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
911 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
915 for( int i = 0; i < cn; i++ )
924 SumFunc func = getSumFunc(depth);
926 CV_Assert( cn <= 4 && func != 0 );
928 const Mat* arrays[] = {&src, 0};
930 NAryMatIterator it(arrays, ptrs);
932 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
934 AutoBuffer<int> _buf;
935 int* buf = (int*)&s[0];
937 bool blockSum = depth < CV_32S;
941 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
942 blockSize = std::min(blockSize, intSumBlockSize);
946 for( k = 0; k < cn; k++ )
948 esz = src.elemSize();
951 for( size_t i = 0; i < it.nplanes; i++, ++it )
953 for( j = 0; j < total; j += blockSize )
955 int bsz = std::min(total - j, blockSize);
956 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
958 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
960 for( k = 0; k < cn; k++ )
977 static bool ocl_countNonZero( InputArray _src, int & res )
979 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
980 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
982 if (depth == CV_64F && !doubleSupport)
985 int dbsize = ocl::Device::getDefault().maxComputeUnits();
986 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
988 int wgs2_aligned = 1;
989 while (wgs2_aligned < (int)wgs)
993 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
994 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
995 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
996 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
997 ocl::typeToStr(depth), (int)wgs, kercn,
998 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
999 _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
1003 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
1004 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1005 dbsize, ocl::KernelArg::PtrWriteOnly(db));
1007 size_t globalsize = dbsize * wgs;
1008 if (k.run(1, &globalsize, &wgs, true))
1009 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
1017 int cv::countNonZero( InputArray _src )
1019 int type = _src.type(), cn = CV_MAT_CN(type);
1020 CV_Assert( cn == 1 );
1024 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
1025 ocl_countNonZero(_src, res),
1029 Mat src = _src.getMat();
1031 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
1032 if (src.dims <= 2 || src.isContinuous())
1034 IppiSize roiSize = { src.cols, src.rows };
1035 Ipp32s count = 0, srcstep = (Ipp32s)src.step;
1036 IppStatus status = (IppStatus)-1;
1038 if (src.isContinuous())
1040 roiSize.width = (Ipp32s)src.total();
1042 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
1045 int depth = CV_MAT_DEPTH(type);
1047 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
1048 else if (depth == CV_32F)
1049 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
1052 return (Ipp32s)src.total() - count;
1053 setIppErrorStatus();
1057 CountNonZeroFunc func = getCountNonZeroTab(src.depth());
1058 CV_Assert( func != 0 );
1060 const Mat* arrays[] = {&src, 0};
1062 NAryMatIterator it(arrays, ptrs);
1063 int total = (int)it.size, nz = 0;
1065 for( size_t i = 0; i < it.nplanes; i++, ++it )
1066 nz += func( ptrs[0], total );
1071 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
1073 Mat src = _src.getMat(), mask = _mask.getMat();
1074 CV_Assert( mask.empty() || mask.type() == CV_8U );
1076 int k, cn = src.channels(), depth = src.depth();
1078 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1079 size_t total_size = src.total();
1080 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1081 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1083 IppiSize sz = { cols, rows };
1084 int type = src.type();
1087 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
1088 ippiMaskMeanFuncC1 ippFuncC1 =
1089 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
1090 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
1091 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
1096 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 )
1098 setIppErrorStatus();
1100 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
1101 ippiMaskMeanFuncC3 ippFuncC3 =
1102 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
1103 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
1104 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
1108 Ipp64f res1, res2, res3;
1109 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 &&
1110 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 &&
1111 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 )
1113 return Scalar(res1, res2, res3);
1115 setIppErrorStatus();
1120 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
1121 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
1122 ippiMeanFuncHint ippFuncHint =
1123 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
1124 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
1125 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
1127 ippiMeanFuncNoHint ippFuncNoHint =
1128 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
1129 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
1130 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
1131 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
1132 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
1133 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
1134 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
1135 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
1136 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
1138 // Make sure only zero or one version of the function pointer is valid
1139 CV_Assert(!ippFuncHint || !ippFuncNoHint);
1140 if( ippFuncHint || ippFuncNoHint )
1143 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) :
1144 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res);
1148 for( int i = 0; i < cn; i++ )
1152 setIppErrorStatus();
1158 SumFunc func = getSumFunc(depth);
1160 CV_Assert( cn <= 4 && func != 0 );
1162 const Mat* arrays[] = {&src, &mask, 0};
1164 NAryMatIterator it(arrays, ptrs);
1166 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1168 AutoBuffer<int> _buf;
1169 int* buf = (int*)&s[0];
1170 bool blockSum = depth <= CV_16S;
1171 size_t esz = 0, nz0 = 0;
1175 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
1176 blockSize = std::min(blockSize, intSumBlockSize);
1180 for( k = 0; k < cn; k++ )
1182 esz = src.elemSize();
1185 for( size_t i = 0; i < it.nplanes; i++, ++it )
1187 for( j = 0; j < total; j += blockSize )
1189 int bsz = std::min(total - j, blockSize);
1190 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
1193 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1195 for( k = 0; k < cn; k++ )
1207 return s*(nz0 ? 1./nz0 : 0);
1214 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1216 bool haveMask = _mask.kind() != _InputArray::NONE;
1217 int nz = haveMask ? -1 : (int)_src.total();
1218 Scalar mean, stddev;
1221 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1222 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
1223 isContinuous = _src.isContinuous(),
1224 isMaskContinuous = _mask.isContinuous();
1225 const ocl::Device &defDev = ocl::Device::getDefault();
1226 int groups = defDev.maxComputeUnits();
1227 if (defDev.isIntel())
1229 static const int subSliceEUCount = 10;
1230 groups = (groups / subSliceEUCount) * 2;
1232 size_t wgs = defDev.maxWorkGroupSize();
1234 int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth),
1235 dtype = CV_MAKE_TYPE(ddepth, cn),
1236 sqdtype = CV_MAKETYPE(sqddepth, cn);
1237 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
1239 int wgs2_aligned = 1;
1240 while (wgs2_aligned < (int)wgs)
1244 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
1248 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
1249 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
1250 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
1251 ocl::typeToStr(type), ocl::typeToStr(depth),
1252 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
1253 ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
1254 ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
1255 cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
1256 isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
1257 ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
1258 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
1259 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
1261 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
1265 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
1266 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
1267 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1269 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1270 dbarg = ocl::KernelArg::PtrWriteOnly(db),
1271 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
1274 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg);
1276 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
1278 size_t globalsize = groups * wgs;
1279 if (!k.run(1, &globalsize, &wgs, false))
1282 typedef Scalar (* part_sum)(Mat m);
1283 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
1284 Mat dbm = db.getMat(ACCESS_READ);
1286 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr()));
1287 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype)));
1290 nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() +
1291 groups * (CV_ELEM_SIZE(dtype) +
1292 CV_ELEM_SIZE(sqdtype))))[0]);
1295 double total = nz != 0 ? 1.0 / nz : 0;
1296 int k, j, cn = _src.channels();
1297 for (int i = 0; i < cn; ++i)
1300 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
1303 for( j = 0; j < 2; j++ )
1305 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
1306 _OutputArray _dst = j == 0 ? _mean : _sdv;
1307 if( !_dst.needed() )
1310 if( !_dst.fixedSize() )
1311 _dst.create(cn, 1, CV_64F, -1, true);
1312 Mat dst = _dst.getMat();
1313 int dcn = (int)dst.total();
1314 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1315 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1316 double* dptr = dst.ptr<double>();
1317 for( k = 0; k < cn; k++ )
1319 for( ; k < dcn; k++ )
1330 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1332 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
1333 ocl_meanStdDev(_src, _mean, _sdv, _mask))
1335 Mat src = _src.getMat(), mask = _mask.getMat();
1336 CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
1338 int k, cn = src.channels(), depth = src.depth();
1340 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1341 size_t total_size = src.total();
1342 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1343 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1345 Ipp64f mean_temp[3];
1346 Ipp64f stddev_temp[3];
1347 Ipp64f *pmean = &mean_temp[0];
1348 Ipp64f *pstddev = &stddev_temp[0];
1351 if( _mean.needed() )
1353 if( !_mean.fixedSize() )
1354 _mean.create(cn, 1, CV_64F, -1, true);
1355 mean = _mean.getMat();
1356 dcn_mean = (int)mean.total();
1357 pmean = mean.ptr<Ipp64f>();
1359 int dcn_stddev = -1;
1362 if( !_sdv.fixedSize() )
1363 _sdv.create(cn, 1, CV_64F, -1, true);
1364 stddev = _sdv.getMat();
1365 dcn_stddev = (int)stddev.total();
1366 pstddev = stddev.ptr<Ipp64f>();
1368 for( int c = cn; c < dcn_mean; c++ )
1370 for( int c = cn; c < dcn_stddev; c++ )
1372 IppiSize sz = { cols, rows };
1373 int type = src.type();
1376 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1377 ippiMaskMeanStdDevFuncC1 ippFuncC1 =
1378 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
1379 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
1380 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
1384 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 )
1386 setIppErrorStatus();
1388 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1389 ippiMaskMeanStdDevFuncC3 ippFuncC3 =
1390 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
1391 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
1392 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
1396 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1397 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1398 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1400 setIppErrorStatus();
1405 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1406 ippiMeanStdDevFuncC1 ippFuncC1 =
1407 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
1408 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
1409 #if (IPP_VERSION_X100 >= 801)
1410 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
1415 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 )
1417 setIppErrorStatus();
1419 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1420 ippiMeanStdDevFuncC3 ippFuncC3 =
1421 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
1422 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
1423 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
1427 if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1428 ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1429 ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1431 setIppErrorStatus();
1438 SumSqrFunc func = getSumSqrTab(depth);
1440 CV_Assert( func != 0 );
1442 const Mat* arrays[] = {&src, &mask, 0};
1444 NAryMatIterator it(arrays, ptrs);
1445 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1446 int j, count = 0, nz0 = 0;
1447 AutoBuffer<double> _buf(cn*4);
1448 double *s = (double*)_buf, *sq = s + cn;
1449 int *sbuf = (int*)s, *sqbuf = (int*)sq;
1450 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
1453 for( k = 0; k < cn; k++ )
1458 intSumBlockSize = 1 << 15;
1459 blockSize = std::min(blockSize, intSumBlockSize);
1460 sbuf = (int*)(sq + cn);
1463 for( k = 0; k < cn; k++ )
1464 sbuf[k] = sqbuf[k] = 0;
1465 esz = src.elemSize();
1468 for( size_t i = 0; i < it.nplanes; i++, ++it )
1470 for( j = 0; j < total; j += blockSize )
1472 int bsz = std::min(total - j, blockSize);
1473 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
1476 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1478 for( k = 0; k < cn; k++ )
1485 for( k = 0; k < cn; k++ )
1499 double scale = nz0 ? 1./nz0 : 0.;
1500 for( k = 0; k < cn; k++ )
1503 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
1506 for( j = 0; j < 2; j++ )
1508 const double* sptr = j == 0 ? s : sq;
1509 _OutputArray _dst = j == 0 ? _mean : _sdv;
1510 if( !_dst.needed() )
1513 if( !_dst.fixedSize() )
1514 _dst.create(cn, 1, CV_64F, -1, true);
1515 Mat dst = _dst.getMat();
1516 int dcn = (int)dst.total();
1517 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1518 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1519 double* dptr = dst.ptr<double>();
1520 for( k = 0; k < cn; k++ )
1522 for( ; k < dcn; k++ )
1527 /****************************************************************************************\
1529 \****************************************************************************************/
1534 template<typename T, typename WT> static void
1535 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
1536 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
1538 WT minVal = *_minVal, maxVal = *_maxVal;
1539 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
1543 for( int i = 0; i < len; i++ )
1549 minIdx = startIdx + i;
1554 maxIdx = startIdx + i;
1560 for( int i = 0; i < len; i++ )
1563 if( mask[i] && val < minVal )
1566 minIdx = startIdx + i;
1568 if( mask[i] && val > maxVal )
1571 maxIdx = startIdx + i;
1582 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
1583 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1584 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1586 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
1587 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1588 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1590 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
1591 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1592 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1594 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
1595 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1596 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1598 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
1599 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1600 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1602 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
1603 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1604 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1606 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
1607 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1608 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1610 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
1612 static MinMaxIdxFunc getMinmaxTab(int depth)
1614 static MinMaxIdxFunc minmaxTab[] =
1616 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
1617 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
1618 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
1619 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
1623 return minmaxTab[depth];
1626 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
1632 for( i = d-1; i >= 0; i-- )
1635 idx[i] = (int)(ofs % sz);
1641 for( i = d-1; i >= 0; i-- )
1648 template <typename T>
1649 void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
1650 int* minLoc, int* maxLoc,
1651 int groupnum, int cols, double * maxVal2)
1653 uint index_max = std::numeric_limits<uint>::max();
1654 T minval = std::numeric_limits<T>::max();
1655 T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
1656 uint minloc = index_max, maxloc = index_max;
1659 const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
1660 const uint * minlocptr = NULL, * maxlocptr = NULL;
1661 if (minVal || minLoc)
1663 minptr = db.ptr<T>();
1664 index += sizeof(T) * groupnum;
1666 if (maxVal || maxLoc)
1668 maxptr = (const T *)(db.ptr() + index);
1669 index += sizeof(T) * groupnum;
1673 minlocptr = (const uint *)(db.ptr() + index);
1674 index += sizeof(uint) * groupnum;
1678 maxlocptr = (const uint *)(db.ptr() + index);
1679 index += sizeof(uint) * groupnum;
1682 maxptr2 = (const T *)(db.ptr() + index);
1684 for (int i = 0; i < groupnum; i++)
1686 if (minptr && minptr[i] <= minval)
1688 if (minptr[i] == minval)
1691 minloc = std::min(minlocptr[i], minloc);
1696 minloc = minlocptr[i];
1700 if (maxptr && maxptr[i] >= maxval)
1702 if (maxptr[i] == maxval)
1705 maxloc = std::min(maxlocptr[i], maxloc);
1710 maxloc = maxlocptr[i];
1714 if (maxptr2 && maxptr2[i] > maxval2)
1715 maxval2 = maxptr2[i];
1717 bool zero_mask = (minLoc && minloc == index_max) ||
1718 (maxLoc && maxloc == index_max);
1721 *minVal = zero_mask ? 0 : (double)minval;
1723 *maxVal = zero_mask ? 0 : (double)maxval;
1725 *maxVal2 = zero_mask ? 0 : (double)maxval2;
1729 minLoc[0] = zero_mask ? -1 : minloc / cols;
1730 minLoc[1] = zero_mask ? -1 : minloc % cols;
1734 maxLoc[0] = zero_mask ? -1 : maxloc / cols;
1735 maxLoc[1] = zero_mask ? -1 : maxloc % cols;
1739 typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal,
1740 int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2);
1742 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
1743 int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL)
1745 const ocl::Device & dev = ocl::Device::getDefault();
1746 bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
1747 haveSrc2 = _src2.kind() != _InputArray::NONE;
1748 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1749 kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2));
1751 // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014)
1752 if ((haveMask || type == CV_32FC1) && dev.isAMD())
1755 CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
1756 (cn >= 1 && !minLoc && !maxLoc) );
1761 CV_Assert(!haveSrc2 || _src2.type() == type);
1763 if (depth == CV_32S)
1766 if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
1769 int groupnum = dev.maxComputeUnits();
1770 size_t wgs = dev.maxWorkGroupSize();
1772 int wgs2_aligned = 1;
1773 while (wgs2_aligned < (int)wgs)
1777 bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
1778 needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
1780 // in case of mask we must know whether mask is filled with zeros or not
1781 // so let's calculate min or max location, if it's undefined, so mask is zeros
1782 if (!(needMaxLoc || needMinLoc) && haveMask)
1791 String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
1792 " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
1793 " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s",
1794 depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
1795 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
1796 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1797 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1798 _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
1799 needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
1800 needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
1801 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1802 ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]),
1803 absValues ? " -D OP_ABS" : "",
1804 haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
1805 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
1806 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert");
1808 ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
1812 int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
1813 dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
1814 (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
1815 (maxVal2 ? esz : 0));
1816 UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1818 if (cn > 1 && !haveMask)
1820 src = src.reshape(1);
1821 src2 = src2.reshape(1);
1827 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1828 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2));
1830 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1831 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask),
1832 ocl::KernelArg::ReadOnlyNoSize(src2));
1837 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1838 groupnum, ocl::KernelArg::PtrWriteOnly(db));
1840 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1841 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
1844 size_t globalsize = groupnum * wgs;
1845 if (!k.run(1, &globalsize, &wgs, true))
1848 static const getMinMaxResFunc functab[7] =
1850 getMinMaxRes<uchar>,
1852 getMinMaxRes<ushort>,
1853 getMinMaxRes<short>,
1855 getMinMaxRes<float>,
1856 getMinMaxRes<double>
1859 getMinMaxResFunc func = functab[ddepth];
1862 func(db.getMat(ACCESS_READ), minVal, maxVal,
1863 needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
1864 needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc,
1865 groupnum, src.cols, maxVal2);
1874 void cv::minMaxIdx(InputArray _src, double* minVal,
1875 double* maxVal, int* minIdx, int* maxIdx,
1878 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1879 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1880 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
1882 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
1883 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
1885 Mat src = _src.getMat(), mask = _mask.getMat();
1887 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1888 size_t total_size = src.total();
1889 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
1890 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1892 IppiSize sz = { cols * cn, rows };
1896 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
1897 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1899 CV_SUPPRESS_DEPRECATED_START
1900 ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
1901 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
1902 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
1903 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
1904 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
1905 CV_SUPPRESS_DEPRECATED_END
1910 IppiPoint minp, maxp;
1911 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1914 *minVal = (double)min;
1916 *maxVal = (double)max;
1917 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.ptr()[0] )
1918 minp.x = maxp.x = -1;
1921 size_t minidx = minp.y * cols + minp.x + 1;
1922 ofs2idx(src, minidx, minIdx);
1926 size_t maxidx = maxp.y * cols + maxp.x + 1;
1927 ofs2idx(src, maxidx, maxIdx);
1931 setIppErrorStatus();
1936 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1938 CV_SUPPRESS_DEPRECATED_START
1939 ippiMinMaxIndxFuncC1 ippFuncC1 =
1940 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
1941 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
1942 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
1943 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 0;
1944 CV_SUPPRESS_DEPRECATED_END
1949 IppiPoint minp, maxp;
1950 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1953 *minVal = (double)min;
1955 *maxVal = (double)max;
1958 size_t minidx = minp.y * cols + minp.x + 1;
1959 ofs2idx(src, minidx, minIdx);
1963 size_t maxidx = maxp.y * cols + maxp.x + 1;
1964 ofs2idx(src, maxidx, maxIdx);
1968 setIppErrorStatus();
1974 MinMaxIdxFunc func = getMinmaxTab(depth);
1975 CV_Assert( func != 0 );
1977 const Mat* arrays[] = {&src, &mask, 0};
1979 NAryMatIterator it(arrays, ptrs);
1981 size_t minidx = 0, maxidx = 0;
1982 int iminval = INT_MAX, imaxval = INT_MIN;
1983 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
1984 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
1985 size_t startidx = 1;
1986 int *minval = &iminval, *maxval = &imaxval;
1987 int planeSize = (int)it.size*cn;
1989 if( depth == CV_32F )
1990 minval = (int*)&fminval, maxval = (int*)&fmaxval;
1991 else if( depth == CV_64F )
1992 minval = (int*)&dminval, maxval = (int*)&dmaxval;
1994 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
1995 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
1998 dminval = dmaxval = 0;
1999 else if( depth == CV_32F )
2000 dminval = fminval, dmaxval = fmaxval;
2001 else if( depth <= CV_32S )
2002 dminval = iminval, dmaxval = imaxval;
2010 ofs2idx(src, minidx, minIdx);
2012 ofs2idx(src, maxidx, maxIdx);
2015 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
2016 Point* minLoc, Point* maxLoc, InputArray mask )
2018 CV_Assert(_img.dims() <= 2);
2020 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
2022 std::swap(minLoc->x, minLoc->y);
2024 std::swap(maxLoc->x, maxLoc->y);
2027 /****************************************************************************************\
2029 \****************************************************************************************/
2034 float normL2Sqr_(const float* a, const float* b, int n)
2036 int j = 0; float d = 0.f;
2040 float CV_DECL_ALIGNED(16) buf[4];
2041 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
2043 for( ; j <= n - 8; j += 8 )
2045 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
2046 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
2047 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
2048 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
2050 _mm_store_ps(buf, _mm_add_ps(d0, d1));
2051 d = buf[0] + buf[1] + buf[2] + buf[3];
2056 for( ; j <= n - 4; j += 4 )
2058 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
2059 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
2065 float t = a[j] - b[j];
2072 float normL1_(const float* a, const float* b, int n)
2074 int j = 0; float d = 0.f;
2078 float CV_DECL_ALIGNED(16) buf[4];
2079 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
2080 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
2081 __m128 absmask = _mm_load_ps((const float*)absbuf);
2083 for( ; j <= n - 8; j += 8 )
2085 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
2086 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
2087 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
2088 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
2090 _mm_store_ps(buf, _mm_add_ps(d0, d1));
2091 d = buf[0] + buf[1] + buf[2] + buf[3];
2095 float32x4_t v_sum = vdupq_n_f32(0.0f);
2096 for ( ; j <= n - 4; j += 4)
2097 v_sum = vaddq_f32(v_sum, vabdq_f32(vld1q_f32(a + j), vld1q_f32(b + j)));
2099 float CV_DECL_ALIGNED(16) buf[4];
2100 vst1q_f32(buf, v_sum);
2101 d = buf[0] + buf[1] + buf[2] + buf[3];
2104 for( ; j <= n - 4; j += 4 )
2106 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
2107 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
2112 d += std::abs(a[j] - b[j]);
2116 int normL1_(const uchar* a, const uchar* b, int n)
2122 __m128i d0 = _mm_setzero_si128();
2124 for( ; j <= n - 16; j += 16 )
2126 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
2127 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
2129 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
2132 for( ; j <= n - 4; j += 4 )
2134 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
2135 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
2137 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
2139 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
2143 uint32x4_t v_sum = vdupq_n_u32(0.0f);
2144 for ( ; j <= n - 16; j += 16)
2146 uint8x16_t v_dst = vabdq_u8(vld1q_u8(a + j), vld1q_u8(b + j));
2147 uint16x8_t v_low = vmovl_u8(vget_low_u8(v_dst)), v_high = vmovl_u8(vget_high_u8(v_dst));
2148 v_sum = vaddq_u32(v_sum, vaddl_u16(vget_low_u16(v_low), vget_low_u16(v_high)));
2149 v_sum = vaddq_u32(v_sum, vaddl_u16(vget_high_u16(v_low), vget_high_u16(v_high)));
2152 uint CV_DECL_ALIGNED(16) buf[4];
2153 vst1q_u32(buf, v_sum);
2154 d = buf[0] + buf[1] + buf[2] + buf[3];
2157 for( ; j <= n - 4; j += 4 )
2159 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
2160 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
2164 d += std::abs(a[j] - b[j]);
2168 static const uchar popCountTable[] =
2170 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2171 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2172 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2173 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2174 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2175 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2176 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2177 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
2180 static const uchar popCountTable2[] =
2182 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
2183 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
2184 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2185 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2186 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2187 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2188 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
2189 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
2192 static const uchar popCountTable4[] =
2194 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2195 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2196 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2197 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2198 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2199 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2200 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2201 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
2204 static int normHamming(const uchar* a, int n)
2206 int i = 0, result = 0;
2209 uint32x4_t bits = vmovq_n_u32(0);
2210 for (; i <= n - 16; i += 16) {
2211 uint8x16_t A_vec = vld1q_u8 (a + i);
2212 uint8x16_t bitsSet = vcntq_u8 (A_vec);
2213 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
2214 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
2215 bits = vaddq_u32(bits, bitSet4);
2217 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
2218 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
2219 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
2222 for( ; i <= n - 4; i += 4 )
2223 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
2224 popCountTable[a[i+2]] + popCountTable[a[i+3]];
2226 result += popCountTable[a[i]];
2230 int normHamming(const uchar* a, const uchar* b, int n)
2232 int i = 0, result = 0;
2235 uint32x4_t bits = vmovq_n_u32(0);
2236 for (; i <= n - 16; i += 16) {
2237 uint8x16_t A_vec = vld1q_u8 (a + i);
2238 uint8x16_t B_vec = vld1q_u8 (b + i);
2239 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
2240 uint8x16_t bitsSet = vcntq_u8 (AxorB);
2241 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
2242 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
2243 bits = vaddq_u32(bits, bitSet4);
2245 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
2246 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
2247 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
2250 for( ; i <= n - 4; i += 4 )
2251 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
2252 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
2254 result += popCountTable[a[i] ^ b[i]];
2258 static int normHamming(const uchar* a, int n, int cellSize)
2261 return normHamming(a, n);
2262 const uchar* tab = 0;
2264 tab = popCountTable2;
2265 else if( cellSize == 4 )
2266 tab = popCountTable4;
2268 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
2269 int i = 0, result = 0;
2270 #if CV_ENABLE_UNROLLED
2271 for( ; i <= n - 4; i += 4 )
2272 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
2275 result += tab[a[i]];
2279 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
2282 return normHamming(a, b, n);
2283 const uchar* tab = 0;
2285 tab = popCountTable2;
2286 else if( cellSize == 4 )
2287 tab = popCountTable4;
2289 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
2290 int i = 0, result = 0;
2291 #if CV_ENABLE_UNROLLED
2292 for( ; i <= n - 4; i += 4 )
2293 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
2294 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
2297 result += tab[a[i] ^ b[i]];
2302 template<typename T, typename ST> int
2303 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2305 ST result = *_result;
2308 result = std::max(result, normInf<T, ST>(src, len*cn));
2312 for( int i = 0; i < len; i++, src += cn )
2315 for( int k = 0; k < cn; k++ )
2316 result = std::max(result, ST(std::abs(src[k])));
2323 template<typename T, typename ST> int
2324 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2326 ST result = *_result;
2329 result += normL1<T, ST>(src, len*cn);
2333 for( int i = 0; i < len; i++, src += cn )
2336 for( int k = 0; k < cn; k++ )
2337 result += std::abs(src[k]);
2344 template<typename T, typename ST> int
2345 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2347 ST result = *_result;
2350 result += normL2Sqr<T, ST>(src, len*cn);
2354 for( int i = 0; i < len; i++, src += cn )
2357 for( int k = 0; k < cn; k++ )
2368 template<typename T, typename ST> int
2369 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2371 ST result = *_result;
2374 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
2378 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2381 for( int k = 0; k < cn; k++ )
2382 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
2389 template<typename T, typename ST> int
2390 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2392 ST result = *_result;
2395 result += normL1<T, ST>(src1, src2, len*cn);
2399 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2402 for( int k = 0; k < cn; k++ )
2403 result += std::abs(src1[k] - src2[k]);
2410 template<typename T, typename ST> int
2411 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2413 ST result = *_result;
2416 result += normL2Sqr<T, ST>(src1, src2, len*cn);
2420 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2423 for( int k = 0; k < cn; k++ )
2425 ST v = src1[k] - src2[k];
2435 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
2436 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
2437 { return norm##L##_(src, mask, r, len, cn); } \
2438 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
2439 const uchar* mask, ntype* r, int len, int cn) \
2440 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
2442 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
2443 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
2444 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
2445 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
2447 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
2448 CV_DEF_NORM_ALL(8s, schar, int, int, int)
2449 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
2450 CV_DEF_NORM_ALL(16s, short, int, int, double)
2451 CV_DEF_NORM_ALL(32s, int, int, double, double)
2452 CV_DEF_NORM_ALL(32f, float, float, double, double)
2453 CV_DEF_NORM_ALL(64f, double, double, double, double)
2456 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
2457 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
2459 static NormFunc getNormFunc(int normType, int depth)
2461 static NormFunc normTab[3][8] =
2464 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
2465 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
2468 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
2469 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
2472 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
2473 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
2477 return normTab[normType][depth];
2480 static NormDiffFunc getNormDiffFunc(int normType, int depth)
2482 static NormDiffFunc normDiffTab[3][8] =
2485 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
2486 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
2487 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
2488 (NormDiffFunc)normDiffInf_64f, 0
2491 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
2492 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
2493 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
2494 (NormDiffFunc)normDiffL1_64f, 0
2497 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
2498 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
2499 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
2500 (NormDiffFunc)normDiffL2_64f, 0
2504 return normDiffTab[normType][depth];
2509 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
2511 const ocl::Device & d = ocl::Device::getDefault();
2512 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2513 bool doubleSupport = d.doubleFPConfig() > 0,
2514 haveMask = _mask.kind() != _InputArray::NONE;
2516 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
2517 (!doubleSupport && depth == CV_64F))
2520 UMat src = _src.getUMat();
2522 if (normType == NORM_INF)
2524 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
2525 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
2528 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2531 bool unstype = depth == CV_8U || depth == CV_16U;
2533 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
2534 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2541 for (int i = 0; i < cn; ++i)
2544 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2554 double cv::norm( InputArray _src, int normType, InputArray _mask )
2556 normType &= NORM_TYPE_MASK;
2557 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2558 normType == NORM_L2 || normType == NORM_L2SQR ||
2559 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
2563 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
2564 ocl_norm(_src, normType, _mask, _result),
2568 Mat src = _src.getMat(), mask = _mask.getMat();
2569 int depth = src.depth(), cn = src.channels();
2571 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2572 size_t total_size = src.total();
2573 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
2575 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
2576 && cols > 0 && (size_t)rows*cols == total_size
2577 && (normType == NORM_INF || normType == NORM_L1 ||
2578 normType == NORM_L2 || normType == NORM_L2SQR) )
2580 IppiSize sz = { cols, rows };
2581 int type = src.type();
2584 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2585 ippiMaskNormFuncC1 ippFuncC1 =
2586 normType == NORM_INF ?
2587 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
2588 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
2589 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
2590 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
2592 normType == NORM_L1 ?
2593 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
2594 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
2595 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
2596 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
2598 normType == NORM_L2 || normType == NORM_L2SQR ?
2599 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
2600 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
2601 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
2602 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
2607 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
2608 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2610 setIppErrorStatus();
2612 /*typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2613 ippiMaskNormFuncC3 ippFuncC3 =
2614 normType == NORM_INF ?
2615 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
2616 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
2617 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
2618 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
2620 normType == NORM_L1 ?
2621 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
2622 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
2623 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
2624 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
2626 normType == NORM_L2 || normType == NORM_L2SQR ?
2627 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
2628 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
2629 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
2630 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
2634 Ipp64f norm1, norm2, norm3;
2635 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2636 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2637 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2640 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2641 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2642 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2644 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2646 setIppErrorStatus();
2651 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2652 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
2653 ippiNormFuncHint ippFuncHint =
2654 normType == NORM_L1 ?
2655 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
2656 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
2657 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
2659 normType == NORM_L2 || normType == NORM_L2SQR ?
2660 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
2661 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
2662 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
2664 ippiNormFuncNoHint ippFuncNoHint =
2665 normType == NORM_INF ?
2666 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
2667 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
2668 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
2669 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
2670 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
2671 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
2672 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
2673 #if (IPP_VERSION_X100 >= 801)
2674 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2675 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2677 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
2678 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
2679 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
2681 normType == NORM_L1 ?
2682 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
2683 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
2684 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
2685 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
2686 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
2687 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
2688 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
2689 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
2690 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
2692 normType == NORM_L2 || normType == NORM_L2SQR ?
2693 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
2694 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
2695 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
2696 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
2697 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
2698 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
2699 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
2700 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
2701 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
2703 // Make sure only zero or one version of the function pointer is valid
2704 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2705 if( ippFuncHint || ippFuncNoHint )
2707 Ipp64f norm_array[4];
2708 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
2709 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, norm_array);
2712 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2713 for( int i = 1; i < cn; i++ )
2716 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2717 normType == NORM_L1 ? norm + norm_array[i] :
2718 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2721 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2723 setIppErrorStatus();
2729 if( src.isContinuous() && mask.empty() )
2731 size_t len = src.total()*cn;
2732 if( len == (size_t)(int)len )
2734 if( depth == CV_32F )
2736 const float* data = src.ptr<float>();
2738 if( normType == NORM_L2 )
2741 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2742 return std::sqrt(result);
2744 if( normType == NORM_L2SQR )
2747 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2750 if( normType == NORM_L1 )
2753 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
2756 if( normType == NORM_INF )
2759 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
2763 if( depth == CV_8U )
2765 const uchar* data = src.ptr<uchar>();
2767 if( normType == NORM_HAMMING )
2768 return normHamming(data, (int)len);
2770 if( normType == NORM_HAMMING2 )
2771 return normHamming(data, (int)len, 2);
2776 CV_Assert( mask.empty() || mask.type() == CV_8U );
2778 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2783 bitwise_and(src, mask, temp);
2784 return norm(temp, normType);
2786 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2788 const Mat* arrays[] = {&src, 0};
2790 NAryMatIterator it(arrays, ptrs);
2791 int total = (int)it.size;
2794 for( size_t i = 0; i < it.nplanes; i++, ++it )
2795 result += normHamming(ptrs[0], total, cellSize);
2800 NormFunc func = getNormFunc(normType >> 1, depth);
2801 CV_Assert( func != 0 );
2803 const Mat* arrays[] = {&src, &mask, 0};
2813 NAryMatIterator it(arrays, ptrs);
2814 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2815 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2816 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2818 int *ibuf = &result.i;
2823 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
2824 blockSize = std::min(blockSize, intSumBlockSize);
2826 esz = src.elemSize();
2829 for( size_t i = 0; i < it.nplanes; i++, ++it )
2831 for( j = 0; j < total; j += blockSize )
2833 int bsz = std::min(total - j, blockSize);
2834 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
2836 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2848 if( normType == NORM_INF )
2850 if( depth == CV_64F )
2852 else if( depth == CV_32F )
2853 result.d = result.f;
2855 result.d = result.i;
2857 else if( normType == NORM_L2 )
2858 result.d = std::sqrt(result.d);
2867 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2870 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2871 bool relative = (normType & NORM_RELATIVE) != 0;
2872 normType &= ~NORM_RELATIVE;
2873 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
2877 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
2878 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
2883 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
2884 false, _src2, relative ? &sc2[0] : NULL))
2890 for (int i = 0; i < cn; ++i)
2897 if (normType == NORM_L2)
2899 result = std::sqrt(result);
2905 result /= (s2 + DBL_EPSILON);
2914 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
2916 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
2920 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
2921 ocl_norm(_src1, _src2, normType, _mask, _result),
2925 if( normType & CV_RELATIVE )
2927 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2928 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2930 normType &= NORM_TYPE_MASK;
2931 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
2932 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2933 size_t total_size = src1.total();
2934 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
2935 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2936 && cols > 0 && (size_t)rows*cols == total_size
2937 && (normType == NORM_INF || normType == NORM_L1 ||
2938 normType == NORM_L2 || normType == NORM_L2SQR) )
2940 IppiSize sz = { cols, rows };
2941 int type = src1.type();
2944 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2945 ippiMaskNormRelFuncC1 ippFuncC1 =
2946 normType == NORM_INF ?
2947 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
2949 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
2951 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
2952 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
2954 normType == NORM_L1 ?
2955 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
2957 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
2959 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
2960 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
2962 normType == NORM_L2 || normType == NORM_L2SQR ?
2963 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
2964 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
2965 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
2966 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
2971 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
2972 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2973 setIppErrorStatus();
2978 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2979 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2980 ippiNormRelFuncNoHint ippFuncNoHint =
2981 normType == NORM_INF ?
2982 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
2983 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
2984 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
2985 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
2987 normType == NORM_L1 ?
2988 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
2989 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
2990 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
2992 normType == NORM_L2 || normType == NORM_L2SQR ?
2993 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
2994 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
2995 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
2997 ippiNormRelFuncHint ippFuncHint =
2998 normType == NORM_L1 ?
2999 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
3001 normType == NORM_L2 || normType == NORM_L2SQR ?
3002 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
3007 if( ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm) >= 0 )
3008 return (double)norm;
3009 setIppErrorStatus();
3014 if( ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
3015 return (double)norm;
3016 setIppErrorStatus();
3021 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
3024 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3025 int depth = src1.depth(), cn = src1.channels();
3028 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
3029 normType == NORM_L2 || normType == NORM_L2SQR ||
3030 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
3032 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
3033 size_t total_size = src1.total();
3034 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
3035 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
3036 && cols > 0 && (size_t)rows*cols == total_size
3037 && (normType == NORM_INF || normType == NORM_L1 ||
3038 normType == NORM_L2 || normType == NORM_L2SQR) )
3040 IppiSize sz = { cols, rows };
3041 int type = src1.type();
3044 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
3045 ippiMaskNormDiffFuncC1 ippFuncC1 =
3046 normType == NORM_INF ?
3047 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
3048 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
3049 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
3050 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
3052 normType == NORM_L1 ?
3053 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
3055 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
3057 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
3058 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
3060 normType == NORM_L2 || normType == NORM_L2SQR ?
3061 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
3062 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
3063 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
3064 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
3069 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
3070 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
3071 setIppErrorStatus();
3074 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
3075 ippiMaskNormDiffFuncC3 ippFuncC3 =
3076 normType == NORM_INF ?
3077 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
3078 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
3079 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
3080 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
3082 normType == NORM_L1 ?
3083 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
3084 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
3085 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
3086 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
3088 normType == NORM_L2 || normType == NORM_L2SQR ?
3089 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
3090 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
3091 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
3092 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
3096 Ipp64f norm1, norm2, norm3;
3097 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
3098 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
3099 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
3102 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
3103 normType == NORM_L1 ? norm1 + norm2 + norm3 :
3104 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
3106 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
3108 setIppErrorStatus();
3114 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
3115 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
3116 ippiNormDiffFuncHint ippFuncHint =
3117 normType == NORM_L1 ?
3118 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
3119 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
3120 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
3122 normType == NORM_L2 || normType == NORM_L2SQR ?
3123 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
3124 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
3125 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
3127 ippiNormDiffFuncNoHint ippFuncNoHint =
3128 normType == NORM_INF ?
3129 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
3130 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
3131 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
3132 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
3133 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
3134 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
3135 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
3136 #if (IPP_VERSION_X100 >= 801)
3137 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
3138 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
3140 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
3141 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
3142 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
3144 normType == NORM_L1 ?
3145 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
3146 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
3147 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
3148 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
3149 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
3150 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
3151 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
3152 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
3153 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
3155 normType == NORM_L2 || normType == NORM_L2SQR ?
3156 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
3157 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
3158 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
3159 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
3160 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
3161 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
3162 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
3163 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
3164 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
3166 // Make sure only zero or one version of the function pointer is valid
3167 CV_Assert(!ippFuncHint || !ippFuncNoHint);
3168 if( ippFuncHint || ippFuncNoHint )
3170 Ipp64f norm_array[4];
3171 IppStatus ret = ippFuncHint ? ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
3172 ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array);
3175 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
3176 for( int i = 1; i < src1.channels(); i++ )
3179 normType == NORM_INF ? std::max(norm, norm_array[i]) :
3180 normType == NORM_L1 ? norm + norm_array[i] :
3181 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
3184 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
3186 setIppErrorStatus();
3192 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
3194 size_t len = src1.total()*src1.channels();
3195 if( len == (size_t)(int)len )
3197 if( src1.depth() == CV_32F )
3199 const float* data1 = src1.ptr<float>();
3200 const float* data2 = src2.ptr<float>();
3202 if( normType == NORM_L2 )
3205 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
3206 return std::sqrt(result);
3208 if( normType == NORM_L2SQR )
3211 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
3214 if( normType == NORM_L1 )
3217 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
3220 if( normType == NORM_INF )
3223 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
3230 CV_Assert( mask.empty() || mask.type() == CV_8U );
3232 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
3237 bitwise_xor(src1, src2, temp);
3238 bitwise_and(temp, mask, temp);
3239 return norm(temp, normType);
3241 int cellSize = normType == NORM_HAMMING ? 1 : 2;
3243 const Mat* arrays[] = {&src1, &src2, 0};
3245 NAryMatIterator it(arrays, ptrs);
3246 int total = (int)it.size;
3249 for( size_t i = 0; i < it.nplanes; i++, ++it )
3250 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
3255 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
3256 CV_Assert( func != 0 );
3258 const Mat* arrays[] = {&src1, &src2, &mask, 0};
3269 NAryMatIterator it(arrays, ptrs);
3270 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
3271 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
3272 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
3274 unsigned *ibuf = &result.u;
3279 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
3280 blockSize = std::min(blockSize, intSumBlockSize);
3282 esz = src1.elemSize();
3285 for( size_t i = 0; i < it.nplanes; i++, ++it )
3287 for( j = 0; j < total; j += blockSize )
3289 int bsz = std::min(total - j, blockSize);
3290 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
3292 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
3305 if( normType == NORM_INF )
3307 if( depth == CV_64F )
3309 else if( depth == CV_32F )
3310 result.d = result.f;
3312 result.d = result.u;
3314 else if( normType == NORM_L2 )
3315 result.d = std::sqrt(result.d);
3321 ///////////////////////////////////// batch distance ///////////////////////////////////////
3326 template<typename _Tp, typename _Rt>
3327 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
3328 int nvecs, int len, _Rt* dist, const uchar* mask)
3330 step2 /= sizeof(src2[0]);
3333 for( int i = 0; i < nvecs; i++ )
3334 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
3338 _Rt val0 = std::numeric_limits<_Rt>::max();
3339 for( int i = 0; i < nvecs; i++ )
3340 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3344 template<typename _Tp, typename _Rt>
3345 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
3346 int nvecs, int len, _Rt* dist, const uchar* mask)
3348 step2 /= sizeof(src2[0]);
3351 for( int i = 0; i < nvecs; i++ )
3352 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
3356 _Rt val0 = std::numeric_limits<_Rt>::max();
3357 for( int i = 0; i < nvecs; i++ )
3358 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3362 template<typename _Tp, typename _Rt>
3363 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
3364 int nvecs, int len, _Rt* dist, const uchar* mask)
3366 step2 /= sizeof(src2[0]);
3369 for( int i = 0; i < nvecs; i++ )
3370 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
3374 _Rt val0 = std::numeric_limits<_Rt>::max();
3375 for( int i = 0; i < nvecs; i++ )
3376 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
3380 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
3381 int nvecs, int len, int* dist, const uchar* mask)
3383 step2 /= sizeof(src2[0]);
3386 for( int i = 0; i < nvecs; i++ )
3387 dist[i] = normHamming(src1, src2 + step2*i, len);
3392 for( int i = 0; i < nvecs; i++ )
3393 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
3397 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
3398 int nvecs, int len, int* dist, const uchar* mask)
3400 step2 /= sizeof(src2[0]);
3403 for( int i = 0; i < nvecs; i++ )
3404 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
3409 for( int i = 0; i < nvecs; i++ )
3410 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
3414 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3415 int nvecs, int len, int* dist, const uchar* mask)
3417 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3420 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3421 int nvecs, int len, float* dist, const uchar* mask)
3423 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3426 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3427 int nvecs, int len, int* dist, const uchar* mask)
3429 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3432 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3433 int nvecs, int len, float* dist, const uchar* mask)
3435 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3438 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3439 int nvecs, int len, float* dist, const uchar* mask)
3441 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3444 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
3445 int nvecs, int len, float* dist, const uchar* mask)
3447 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3450 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
3451 int nvecs, int len, float* dist, const uchar* mask)
3453 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3456 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
3457 int nvecs, int len, float* dist, const uchar* mask)
3459 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3462 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
3463 int nvecs, int len, uchar* dist, const uchar* mask);
3466 struct BatchDistInvoker : public ParallelLoopBody
3468 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
3469 Mat& _dist, Mat& _nidx, int _K,
3470 const Mat& _mask, int _update,
3471 BatchDistFunc _func)
3483 void operator()(const Range& range) const
3485 AutoBuffer<int> buf(src2->rows);
3488 for( int i = range.start; i < range.end; i++ )
3490 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
3491 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
3495 int* nidxptr = nidx->ptr<int>(i);
3496 // since positive float's can be compared just like int's,
3497 // we handle both CV_32S and CV_32F cases with a single branch
3498 int* distptr = (int*)dist->ptr(i);
3502 for( j = 0; j < src2->rows; j++ )
3505 if( d < distptr[K-1] )
3507 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
3509 nidxptr[k+1] = nidxptr[k];
3510 distptr[k+1] = distptr[k];
3512 nidxptr[k+1] = j + update;
3532 void cv::batchDistance( InputArray _src1, InputArray _src2,
3533 OutputArray _dist, int dtype, OutputArray _nidx,
3534 int normType, int K, InputArray _mask,
3535 int update, bool crosscheck )
3537 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3538 int type = src1.type();
3539 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
3540 (type == CV_32F || type == CV_8U));
3541 CV_Assert( _nidx.needed() == (K > 0) );
3545 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
3547 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
3549 K = std::min(K, src2.rows);
3551 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
3552 Mat dist = _dist.getMat(), nidx;
3553 if( _nidx.needed() )
3555 _nidx.create(dist.size(), CV_32S);
3556 nidx = _nidx.getMat();
3559 if( update == 0 && K > 0 )
3561 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
3562 nidx = Scalar::all(-1);
3567 CV_Assert( K == 1 && update == 0 && mask.empty() );
3569 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
3571 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
3572 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
3573 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
3574 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
3575 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
3576 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
3577 if( dtype == CV_32S )
3579 for( int i = 0; i < tdist.rows; i++ )
3581 int idx = tidx.at<int>(i);
3582 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
3585 dist.at<int>(idx) = d;
3586 nidx.at<int>(idx) = i + update;
3592 for( int i = 0; i < tdist.rows; i++ )
3594 int idx = tidx.at<int>(i);
3595 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
3598 dist.at<float>(idx) = d;
3599 nidx.at<int>(idx) = i + update;
3606 BatchDistFunc func = 0;
3609 if( normType == NORM_L1 && dtype == CV_32S )
3610 func = (BatchDistFunc)batchDistL1_8u32s;
3611 else if( normType == NORM_L1 && dtype == CV_32F )
3612 func = (BatchDistFunc)batchDistL1_8u32f;
3613 else if( normType == NORM_L2SQR && dtype == CV_32S )
3614 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
3615 else if( normType == NORM_L2SQR && dtype == CV_32F )
3616 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
3617 else if( normType == NORM_L2 && dtype == CV_32F )
3618 func = (BatchDistFunc)batchDistL2_8u32f;
3619 else if( normType == NORM_HAMMING && dtype == CV_32S )
3620 func = (BatchDistFunc)batchDistHamming;
3621 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
3622 func = (BatchDistFunc)batchDistHamming2;
3624 else if( type == CV_32F && dtype == CV_32F )
3626 if( normType == NORM_L1 )
3627 func = (BatchDistFunc)batchDistL1_32f;
3628 else if( normType == NORM_L2SQR )
3629 func = (BatchDistFunc)batchDistL2Sqr_32f;
3630 else if( normType == NORM_L2 )
3631 func = (BatchDistFunc)batchDistL2_32f;
3635 CV_Error_(CV_StsUnsupportedFormat,
3636 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
3637 type, dtype, normType));
3639 parallel_for_(Range(0, src1.rows),
3640 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
3644 void cv::findNonZero( InputArray _src, OutputArray _idx )
3646 Mat src = _src.getMat();
3647 CV_Assert( src.type() == CV_8UC1 );
3648 int n = countNonZero(src);
3649 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
3651 _idx.create(n, 1, CV_32SC2);
3652 Mat idx = _idx.getMat();
3653 CV_Assert(idx.isContinuous());
3654 Point* idx_ptr = idx.ptr<Point>();
3656 for( int i = 0; i < src.rows; i++ )
3658 const uchar* bin_ptr = src.ptr(i);
3659 for( int j = 0; j < src.cols; j++ )
3661 *idx_ptr++ = Point(j, i);
3665 double cv::PSNR(InputArray _src1, InputArray _src2)
3667 CV_Assert( _src1.depth() == CV_8U );
3668 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
3669 return 20*log10(255./(diff+DBL_EPSILON));
3673 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
3675 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
3676 if( CV_IS_IMAGE(srcarr) )
3678 int coi = cvGetImageCOI((IplImage*)srcarr);
3681 CV_Assert( 0 < coi && coi <= 4 );
3682 sum = cv::Scalar(sum[coi-1]);
3688 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
3690 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3691 if( img.channels() > 1 )
3692 cv::extractImageCOI(imgarr, img);
3693 return countNonZero(img);
3698 cvAvg( const void* imgarr, const void* maskarr )
3700 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3701 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
3702 if( CV_IS_IMAGE(imgarr) )
3704 int coi = cvGetImageCOI((IplImage*)imgarr);
3707 CV_Assert( 0 < coi && coi <= 4 );
3708 mean = cv::Scalar(mean[coi-1]);
3716 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
3718 cv::Scalar mean, sdv;
3722 mask = cv::cvarrToMat(maskarr);
3724 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
3726 if( CV_IS_IMAGE(imgarr) )
3728 int coi = cvGetImageCOI((IplImage*)imgarr);
3731 CV_Assert( 0 < coi && coi <= 4 );
3732 mean = cv::Scalar(mean[coi-1]);
3733 sdv = cv::Scalar(sdv[coi-1]);
3738 *(cv::Scalar*)_mean = mean;
3740 *(cv::Scalar*)_sdv = sdv;
3745 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
3746 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
3748 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
3750 mask = cv::cvarrToMat(maskarr);
3751 if( img.channels() > 1 )
3752 cv::extractImageCOI(imgarr, img);
3754 cv::minMaxLoc( img, _minVal, _maxVal,
3755 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
3760 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
3769 a = cv::cvarrToMat(imgA, false, true, 1);
3771 mask = cv::cvarrToMat(maskarr);
3773 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
3774 cv::extractImageCOI(imgA, a);
3777 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
3779 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
3780 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
3781 cv::extractImageCOI(imgB, b);
3783 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);