1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
47 #include "opencl_kernels_core.hpp"
52 template<typename T> static inline Scalar rawToScalar(const T& v)
55 typedef typename DataType<T>::channel_type T1;
56 int i, n = DataType<T>::channels;
57 for( i = 0; i < n; i++ )
58 s.val[i] = ((T1*)&v)[i];
62 /****************************************************************************************\
64 \****************************************************************************************/
66 template<typename T, typename ST>
67 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
78 #if CV_ENABLE_UNROLLED
79 for(; i <= len - 4; i += 4, src += cn*4 )
80 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
82 for( ; i < len; i++, src += cn )
88 ST s0 = dst[0], s1 = dst[1];
89 for( i = 0; i < len; i++, src += cn )
99 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
100 for( i = 0; i < len; i++, src += cn )
111 for( ; k < cn; k += 4 )
114 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
115 for( i = 0; i < len; i++, src += cn )
117 s0 += src[0]; s1 += src[1];
118 s2 += src[2]; s3 += src[3];
132 for( i = 0; i < len; i++ )
142 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
143 for( i = 0; i < len; i++, src += 3 )
157 for( i = 0; i < len; i++, src += cn )
161 #if CV_ENABLE_UNROLLED
162 for( ; k <= cn - 4; k += 4 )
165 s0 = dst[k] + src[k];
166 s1 = dst[k+1] + src[k+1];
167 dst[k] = s0; dst[k+1] = s1;
168 s0 = dst[k+2] + src[k+2];
169 s1 = dst[k+3] + src[k+3];
170 dst[k+2] = s0; dst[k+3] = s1;
182 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
201 { return sum_(src, mask, dst, len, cn); }
203 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
205 static SumFunc getSumFunc(int depth)
207 static SumFunc sumTab[] =
209 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
210 (SumFunc)sum16u, (SumFunc)sum16s,
212 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
216 return sumTab[depth];
220 static int countNonZero_(const T* src, int len )
223 #if CV_ENABLE_UNROLLED
224 for(; i <= len - 4; i += 4 )
225 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
227 for( ; i < len; i++ )
232 static int countNonZero8u( const uchar* src, int len )
238 __m128i pattern = _mm_setzero_si128 ();
239 static uchar tab[256];
240 static volatile bool initialized = false;
243 // we compute inverse popcount table,
244 // since we pass (img[x] == 0) mask as index in the table.
245 for( int j = 0; j < 256; j++ )
248 for( int mask = 1; mask < 256; mask += mask )
249 val += (j & mask) == 0;
255 for (; i<=len-16; i+=16)
257 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
258 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
259 nz += tab[val & 255] + tab[val >> 8];
263 for( ; i < len; i++ )
268 static int countNonZero16u( const ushort* src, int len )
269 { return countNonZero_(src, len); }
271 static int countNonZero32s( const int* src, int len )
272 { return countNonZero_(src, len); }
274 static int countNonZero32f( const float* src, int len )
275 { return countNonZero_(src, len); }
277 static int countNonZero64f( const double* src, int len )
278 { return countNonZero_(src, len); }
280 typedef int (*CountNonZeroFunc)(const uchar*, int);
282 static CountNonZeroFunc getCountNonZeroTab(int depth)
284 static CountNonZeroFunc countNonZeroTab[] =
286 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
287 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
288 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
289 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
292 return countNonZeroTab[depth];
295 template<typename T, typename ST, typename SQT>
296 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
309 for( i = 0; i < len; i++, src += cn )
312 s0 += v; sq0 += (SQT)v*v;
319 ST s0 = sum[0], s1 = sum[1];
320 SQT sq0 = sqsum[0], sq1 = sqsum[1];
321 for( i = 0; i < len; i++, src += cn )
323 T v0 = src[0], v1 = src[1];
324 s0 += v0; sq0 += (SQT)v0*v0;
325 s1 += v1; sq1 += (SQT)v1*v1;
327 sum[0] = s0; sum[1] = s1;
328 sqsum[0] = sq0; sqsum[1] = sq1;
332 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
333 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
334 for( i = 0; i < len; i++, src += cn )
336 T v0 = src[0], v1 = src[1], v2 = src[2];
337 s0 += v0; sq0 += (SQT)v0*v0;
338 s1 += v1; sq1 += (SQT)v1*v1;
339 s2 += v2; sq2 += (SQT)v2*v2;
341 sum[0] = s0; sum[1] = s1; sum[2] = s2;
342 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
345 for( ; k < cn; k += 4 )
348 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
349 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
350 for( i = 0; i < len; i++, src += cn )
353 v0 = src[0], v1 = src[1];
354 s0 += v0; sq0 += (SQT)v0*v0;
355 s1 += v1; sq1 += (SQT)v1*v1;
356 v0 = src[2], v1 = src[3];
357 s2 += v0; sq2 += (SQT)v0*v0;
358 s3 += v1; sq3 += (SQT)v1*v1;
360 sum[k] = s0; sum[k+1] = s1;
361 sum[k+2] = s2; sum[k+3] = s3;
362 sqsum[k] = sq0; sqsum[k+1] = sq1;
363 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
374 for( i = 0; i < len; i++ )
378 s0 += v; sq0 += (SQT)v*v;
386 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
387 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
388 for( i = 0; i < len; i++, src += 3 )
391 T v0 = src[0], v1 = src[1], v2 = src[2];
392 s0 += v0; sq0 += (SQT)v0*v0;
393 s1 += v1; sq1 += (SQT)v1*v1;
394 s2 += v2; sq2 += (SQT)v2*v2;
397 sum[0] = s0; sum[1] = s1; sum[2] = s2;
398 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
402 for( i = 0; i < len; i++, src += cn )
405 for( int k = 0; k < cn; k++ )
409 SQT sq = sqsum[k] + (SQT)v*v;
410 sum[k] = s; sqsum[k] = sq;
419 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
420 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
422 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
423 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
425 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
426 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
428 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
429 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
431 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
432 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
434 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
435 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
437 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
438 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
440 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
442 static SumSqrFunc getSumSqrTab(int depth)
444 static SumSqrFunc sumSqrTab[] =
446 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
447 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
450 return sumSqrTab[depth];
455 template <typename T> Scalar ocl_part_sum(Mat m)
457 CV_Assert(m.rows == 1);
459 Scalar s = Scalar::all(0);
460 int cn = m.channels();
461 const T * const ptr = m.ptr<T>(0);
463 for (int x = 0, w = m.cols * cn; x < w; )
464 for (int c = 0; c < cn; ++c, ++x)
470 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
472 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(),
473 InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() )
475 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
477 const ocl::Device & dev = ocl::Device::getDefault();
478 bool doubleSupport = dev.doubleFPConfig() > 0,
479 haveMask = _mask.kind() != _InputArray::NONE,
480 haveSrc2 = _src2.kind() != _InputArray::NONE;
481 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
482 kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1,
483 mcn = std::max(cn, kercn);
484 CV_Assert(!haveSrc2 || _src2.type() == type);
485 int convert_cn = haveSrc2 ? mcn : cn;
487 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
490 int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
491 size_t wgs = dev.maxWorkGroupSize();
493 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
494 dtype = CV_MAKE_TYPE(ddepth, cn);
495 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
497 int wgs2_aligned = 1;
498 while (wgs2_aligned < (int)wgs)
502 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
504 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
505 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s",
506 ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth),
507 ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)),
508 ocl::typeToStr(ddepth), ddepth, cn,
509 ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]),
510 opMap[sum_op], (int)wgs, wgs2_aligned,
511 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
512 haveMask ? " -D HAVE_MASK" : "",
513 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
514 haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
515 haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "",
516 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "",
517 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert");
519 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
523 UMat src = _src.getUMat(), src2 = _src2.getUMat(),
524 db(1, dbsize, dtype), mask = _mask.getUMat();
526 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
527 dbarg = ocl::KernelArg::PtrWriteOnly(db),
528 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
529 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2);
534 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg);
536 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg);
541 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg);
543 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg);
546 size_t globalsize = ngroups * wgs;
547 if (k.run(1, &globalsize, &wgs, false))
549 typedef Scalar (*part_sum)(Mat m);
550 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
551 func = funcs[ddepth - CV_32S];
553 Mat mres = db.getMat(ACCESS_READ);
555 const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
557 res = func(mres.colRange(0, ngroups));
567 cv::Scalar cv::sum( InputArray _src )
571 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
572 ocl_sum(_src, _res, OCL_OP_SUM),
576 Mat src = _src.getMat();
577 int k, cn = src.channels(), depth = src.depth();
579 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
580 size_t total_size = src.total();
581 int rows = src.size[0], cols = (int)(total_size/rows);
582 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
584 IppiSize sz = { cols, rows };
585 int type = src.type();
586 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
587 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
588 ippiSumFuncHint ippFuncHint =
589 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
590 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
591 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
593 ippiSumFuncNoHint ippFuncNoHint =
594 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
595 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
596 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
597 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
598 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
599 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
600 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
601 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
602 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
604 CV_Assert(!ippFuncHint || !ippFuncNoHint);
605 if( ippFuncHint || ippFuncNoHint )
608 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
609 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
613 for( int i = 0; i < cn; i++ )
622 SumFunc func = getSumFunc(depth);
624 CV_Assert( cn <= 4 && func != 0 );
626 const Mat* arrays[] = {&src, 0};
628 NAryMatIterator it(arrays, ptrs);
630 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
632 AutoBuffer<int> _buf;
633 int* buf = (int*)&s[0];
635 bool blockSum = depth < CV_32S;
639 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
640 blockSize = std::min(blockSize, intSumBlockSize);
644 for( k = 0; k < cn; k++ )
646 esz = src.elemSize();
649 for( size_t i = 0; i < it.nplanes; i++, ++it )
651 for( j = 0; j < total; j += blockSize )
653 int bsz = std::min(total - j, blockSize);
654 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
656 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
658 for( k = 0; k < cn; k++ )
675 static bool ocl_countNonZero( InputArray _src, int & res )
677 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
678 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
680 if (depth == CV_64F && !doubleSupport)
683 int dbsize = ocl::Device::getDefault().maxComputeUnits();
684 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
686 int wgs2_aligned = 1;
687 while (wgs2_aligned < (int)wgs)
691 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
692 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
693 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
694 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
695 ocl::typeToStr(depth), (int)wgs, kercn,
696 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
697 _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
701 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
702 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
703 dbsize, ocl::KernelArg::PtrWriteOnly(db));
705 size_t globalsize = dbsize * wgs;
706 if (k.run(1, &globalsize, &wgs, true))
707 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
715 int cv::countNonZero( InputArray _src )
717 int type = _src.type(), cn = CV_MAT_CN(type);
718 CV_Assert( cn == 1 );
722 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
723 ocl_countNonZero(_src, res),
727 Mat src = _src.getMat();
729 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
730 if (src.dims <= 2 || src.isContinuous())
732 IppiSize roiSize = { src.cols, src.rows };
733 Ipp32s count = 0, srcstep = (Ipp32s)src.step;
734 IppStatus status = (IppStatus)-1;
736 if (src.isContinuous())
738 roiSize.width = (Ipp32s)src.total();
740 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
743 int depth = CV_MAT_DEPTH(type);
745 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
746 else if (depth == CV_32F)
747 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
750 return (Ipp32s)src.total() - count;
755 CountNonZeroFunc func = getCountNonZeroTab(src.depth());
756 CV_Assert( func != 0 );
758 const Mat* arrays[] = {&src, 0};
760 NAryMatIterator it(arrays, ptrs);
761 int total = (int)it.size, nz = 0;
763 for( size_t i = 0; i < it.nplanes; i++, ++it )
764 nz += func( ptrs[0], total );
769 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
771 Mat src = _src.getMat(), mask = _mask.getMat();
772 CV_Assert( mask.empty() || mask.type() == CV_8U );
774 int k, cn = src.channels(), depth = src.depth();
776 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
777 size_t total_size = src.total();
778 int rows = src.size[0], cols = (int)(total_size/rows);
779 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
781 IppiSize sz = { cols, rows };
782 int type = src.type();
785 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
786 ippiMaskMeanFuncC1 ippFuncC1 =
787 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
788 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
789 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
794 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &res) >= 0 )
798 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
799 ippiMaskMeanFuncC3 ippFuncC3 =
800 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
801 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
802 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
806 Ipp64f res1, res2, res3;
807 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &res1) >= 0 &&
808 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &res2) >= 0 &&
809 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &res3) >= 0 )
811 return Scalar(res1, res2, res3);
818 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
819 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
820 ippiMeanFuncHint ippFuncHint =
821 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
822 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
823 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
825 ippiMeanFuncNoHint ippFuncNoHint =
826 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
827 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
828 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
829 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
830 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
831 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
832 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
833 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
834 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
836 // Make sure only zero or one version of the function pointer is valid
837 CV_Assert(!ippFuncHint || !ippFuncNoHint);
838 if( ippFuncHint || ippFuncNoHint )
841 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
842 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
846 for( int i = 0; i < cn; i++ )
856 SumFunc func = getSumFunc(depth);
858 CV_Assert( cn <= 4 && func != 0 );
860 const Mat* arrays[] = {&src, &mask, 0};
862 NAryMatIterator it(arrays, ptrs);
864 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
866 AutoBuffer<int> _buf;
867 int* buf = (int*)&s[0];
868 bool blockSum = depth <= CV_16S;
869 size_t esz = 0, nz0 = 0;
873 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
874 blockSize = std::min(blockSize, intSumBlockSize);
878 for( k = 0; k < cn; k++ )
880 esz = src.elemSize();
883 for( size_t i = 0; i < it.nplanes; i++, ++it )
885 for( j = 0; j < total; j += blockSize )
887 int bsz = std::min(total - j, blockSize);
888 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
891 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
893 for( k = 0; k < cn; k++ )
905 return s*(nz0 ? 1./nz0 : 0);
912 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
914 bool haveMask = _mask.kind() != _InputArray::NONE;
915 int nz = haveMask ? -1 : (int)_src.total();
919 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
920 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
921 isContinuous = _src.isContinuous(),
922 isMaskContinuous = _mask.isContinuous();
923 const ocl::Device &defDev = ocl::Device::getDefault();
924 int groups = defDev.maxComputeUnits();
925 if (defDev.isIntel())
927 static const int subSliceEUCount = 10;
928 groups = (groups / subSliceEUCount) * 2;
930 size_t wgs = defDev.maxWorkGroupSize();
932 int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth),
933 dtype = CV_MAKE_TYPE(ddepth, cn),
934 sqdtype = CV_MAKETYPE(sqddepth, cn);
935 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
937 int wgs2_aligned = 1;
938 while (wgs2_aligned < (int)wgs)
942 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
946 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
947 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
948 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
949 ocl::typeToStr(type), ocl::typeToStr(depth),
950 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
951 ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
952 ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
953 cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
954 isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
955 ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
956 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
957 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
959 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
963 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
964 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
965 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
967 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
968 dbarg = ocl::KernelArg::PtrWriteOnly(db),
969 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
972 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg);
974 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
976 size_t globalsize = groups * wgs;
977 if (!k.run(1, &globalsize, &wgs, false))
980 typedef Scalar (* part_sum)(Mat m);
981 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
982 Mat dbm = db.getMat(ACCESS_READ);
984 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.data));
985 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.data + groups * CV_ELEM_SIZE(dtype)));
988 nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.data +
989 groups * (CV_ELEM_SIZE(dtype) +
990 CV_ELEM_SIZE(sqdtype))))[0]);
993 double total = nz != 0 ? 1.0 / nz : 0;
994 int k, j, cn = _src.channels();
995 for (int i = 0; i < cn; ++i)
998 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
1001 for( j = 0; j < 2; j++ )
1003 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
1004 _OutputArray _dst = j == 0 ? _mean : _sdv;
1005 if( !_dst.needed() )
1008 if( !_dst.fixedSize() )
1009 _dst.create(cn, 1, CV_64F, -1, true);
1010 Mat dst = _dst.getMat();
1011 int dcn = (int)dst.total();
1012 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1013 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1014 double* dptr = dst.ptr<double>();
1015 for( k = 0; k < cn; k++ )
1017 for( ; k < dcn; k++ )
1028 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1030 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
1031 ocl_meanStdDev(_src, _mean, _sdv, _mask))
1033 Mat src = _src.getMat(), mask = _mask.getMat();
1034 CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
1036 int k, cn = src.channels(), depth = src.depth();
1038 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1039 size_t total_size = src.total();
1040 int rows = src.size[0], cols = (int)(total_size/rows);
1041 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1043 Ipp64f mean_temp[3];
1044 Ipp64f stddev_temp[3];
1045 Ipp64f *pmean = &mean_temp[0];
1046 Ipp64f *pstddev = &stddev_temp[0];
1049 if( _mean.needed() )
1051 if( !_mean.fixedSize() )
1052 _mean.create(cn, 1, CV_64F, -1, true);
1053 mean = _mean.getMat();
1054 dcn_mean = (int)mean.total();
1055 pmean = (Ipp64f *)mean.data;
1057 int dcn_stddev = -1;
1060 if( !_sdv.fixedSize() )
1061 _sdv.create(cn, 1, CV_64F, -1, true);
1062 stddev = _sdv.getMat();
1063 dcn_stddev = (int)stddev.total();
1064 pstddev = (Ipp64f *)stddev.data;
1066 for( int c = cn; c < dcn_mean; c++ )
1068 for( int c = cn; c < dcn_stddev; c++ )
1070 IppiSize sz = { cols, rows };
1071 int type = src.type();
1074 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
1075 ippiMaskMeanStdDevFuncC1 ippFuncC1 =
1076 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
1077 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
1078 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
1082 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, pmean, pstddev) >= 0 )
1084 setIppErrorStatus();
1086 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1087 ippiMaskMeanStdDevFuncC3 ippFuncC3 =
1088 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
1089 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
1090 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
1094 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1095 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1096 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1098 setIppErrorStatus();
1103 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1104 ippiMeanStdDevFuncC1 ippFuncC1 =
1105 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
1106 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
1107 #if (IPP_VERSION_X100 >= 801)
1108 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
1113 if( ippFuncC1(src.data, (int)src.step[0], sz, pmean, pstddev) >= 0 )
1115 setIppErrorStatus();
1117 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1118 ippiMeanStdDevFuncC3 ippFuncC3 =
1119 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
1120 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
1121 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
1125 if( ippFuncC3(src.data, (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1126 ippFuncC3(src.data, (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1127 ippFuncC3(src.data, (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1129 setIppErrorStatus();
1136 SumSqrFunc func = getSumSqrTab(depth);
1138 CV_Assert( func != 0 );
1140 const Mat* arrays[] = {&src, &mask, 0};
1142 NAryMatIterator it(arrays, ptrs);
1143 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1144 int j, count = 0, nz0 = 0;
1145 AutoBuffer<double> _buf(cn*4);
1146 double *s = (double*)_buf, *sq = s + cn;
1147 int *sbuf = (int*)s, *sqbuf = (int*)sq;
1148 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
1151 for( k = 0; k < cn; k++ )
1156 intSumBlockSize = 1 << 15;
1157 blockSize = std::min(blockSize, intSumBlockSize);
1158 sbuf = (int*)(sq + cn);
1161 for( k = 0; k < cn; k++ )
1162 sbuf[k] = sqbuf[k] = 0;
1163 esz = src.elemSize();
1166 for( size_t i = 0; i < it.nplanes; i++, ++it )
1168 for( j = 0; j < total; j += blockSize )
1170 int bsz = std::min(total - j, blockSize);
1171 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
1174 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1176 for( k = 0; k < cn; k++ )
1183 for( k = 0; k < cn; k++ )
1197 double scale = nz0 ? 1./nz0 : 0.;
1198 for( k = 0; k < cn; k++ )
1201 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
1204 for( j = 0; j < 2; j++ )
1206 const double* sptr = j == 0 ? s : sq;
1207 _OutputArray _dst = j == 0 ? _mean : _sdv;
1208 if( !_dst.needed() )
1211 if( !_dst.fixedSize() )
1212 _dst.create(cn, 1, CV_64F, -1, true);
1213 Mat dst = _dst.getMat();
1214 int dcn = (int)dst.total();
1215 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1216 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1217 double* dptr = dst.ptr<double>();
1218 for( k = 0; k < cn; k++ )
1220 for( ; k < dcn; k++ )
1225 /****************************************************************************************\
1227 \****************************************************************************************/
1232 template<typename T, typename WT> static void
1233 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
1234 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
1236 WT minVal = *_minVal, maxVal = *_maxVal;
1237 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
1241 for( int i = 0; i < len; i++ )
1247 minIdx = startIdx + i;
1252 maxIdx = startIdx + i;
1258 for( int i = 0; i < len; i++ )
1261 if( mask[i] && val < minVal )
1264 minIdx = startIdx + i;
1266 if( mask[i] && val > maxVal )
1269 maxIdx = startIdx + i;
1280 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
1281 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1282 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1284 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
1285 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1286 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1288 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
1289 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1290 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1292 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
1293 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1294 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1296 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
1297 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1298 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1300 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
1301 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1302 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1304 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
1305 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1306 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1308 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
1310 static MinMaxIdxFunc getMinmaxTab(int depth)
1312 static MinMaxIdxFunc minmaxTab[] =
1314 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
1315 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
1316 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
1317 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
1321 return minmaxTab[depth];
1324 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
1330 for( i = d-1; i >= 0; i-- )
1333 idx[i] = (int)(ofs % sz);
1339 for( i = d-1; i >= 0; i-- )
1346 template <typename T>
1347 void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
1348 int* minLoc, int* maxLoc,
1349 int groupnum, int cols, double * maxVal2)
1351 uint index_max = std::numeric_limits<uint>::max();
1352 T minval = std::numeric_limits<T>::max();
1353 T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
1354 uint minloc = index_max, maxloc = index_max;
1357 const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
1358 const uint * minlocptr = NULL, * maxlocptr = NULL;
1359 if (minVal || minLoc)
1361 minptr = (const T *)db.data;
1362 index += sizeof(T) * groupnum;
1364 if (maxVal || maxLoc)
1366 maxptr = (const T *)(db.data + index);
1367 index += sizeof(T) * groupnum;
1371 minlocptr = (uint *)(db.data + index);
1372 index += sizeof(uint) * groupnum;
1376 maxlocptr = (uint *)(db.data + index);
1377 index += sizeof(uint) * groupnum;
1380 maxptr2 = (const T *)(db.data + index);
1382 for (int i = 0; i < groupnum; i++)
1384 if (minptr && minptr[i] <= minval)
1386 if (minptr[i] == minval)
1389 minloc = std::min(minlocptr[i], minloc);
1394 minloc = minlocptr[i];
1398 if (maxptr && maxptr[i] >= maxval)
1400 if (maxptr[i] == maxval)
1403 maxloc = std::min(maxlocptr[i], maxloc);
1408 maxloc = maxlocptr[i];
1412 if (maxptr2 && maxptr2[i] > maxval2)
1413 maxval2 = maxptr2[i];
1415 bool zero_mask = (minLoc && minloc == index_max) ||
1416 (maxLoc && maxloc == index_max);
1419 *minVal = zero_mask ? 0 : (double)minval;
1421 *maxVal = zero_mask ? 0 : (double)maxval;
1423 *maxVal2 = zero_mask ? 0 : (double)maxval2;
1427 minLoc[0] = zero_mask ? -1 : minloc / cols;
1428 minLoc[1] = zero_mask ? -1 : minloc % cols;
1432 maxLoc[0] = zero_mask ? -1 : maxloc / cols;
1433 maxLoc[1] = zero_mask ? -1 : maxloc % cols;
1437 typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal,
1438 int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2);
1440 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
1441 int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL)
1443 const ocl::Device & dev = ocl::Device::getDefault();
1444 bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
1445 haveSrc2 = _src2.kind() != _InputArray::NONE;
1446 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1447 kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src));
1449 CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
1450 (cn >= 1 && !minLoc && !maxLoc) );
1455 CV_Assert(!haveSrc2 || _src2.type() == type);
1457 if (depth == CV_32S || depth == CV_32F)
1460 if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
1463 int groupnum = dev.maxComputeUnits();
1464 size_t wgs = dev.maxWorkGroupSize();
1466 int wgs2_aligned = 1;
1467 while (wgs2_aligned < (int)wgs)
1471 bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
1472 needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
1474 // in case of mask we must know whether mask is filled with zeros or not
1475 // so let's calculate min or max location, if it's undefined, so mask is zeros
1476 if (!(needMaxLoc || needMinLoc) && haveMask)
1485 String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
1486 " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
1487 " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s",
1488 depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
1489 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
1490 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1491 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1492 _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
1493 needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
1494 needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
1495 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1496 ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]),
1497 absValues ? " -D OP_ABS" : "",
1498 haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
1499 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth,
1500 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert");
1502 ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
1506 int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
1507 dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
1508 (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
1509 (maxVal2 ? esz : 0));
1510 UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1512 if (cn > 1 && !haveMask)
1514 src = src.reshape(1);
1515 src2 = src2.reshape(1);
1521 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1522 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2));
1524 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1525 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask),
1526 ocl::KernelArg::ReadOnlyNoSize(src2));
1531 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1532 groupnum, ocl::KernelArg::PtrWriteOnly(db));
1534 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1535 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
1538 size_t globalsize = groupnum * wgs;
1539 if (!k.run(1, &globalsize, &wgs, false))
1542 static const getMinMaxResFunc functab[7] =
1544 getMinMaxRes<uchar>,
1546 getMinMaxRes<ushort>,
1547 getMinMaxRes<short>,
1549 getMinMaxRes<float>,
1550 getMinMaxRes<double>
1553 getMinMaxResFunc func = functab[ddepth];
1556 func(db.getMat(ACCESS_READ), minVal, maxVal,
1557 needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
1558 needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc,
1559 groupnum, src.cols, maxVal2);
1568 void cv::minMaxIdx(InputArray _src, double* minVal,
1569 double* maxVal, int* minIdx, int* maxIdx,
1572 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1573 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1574 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
1576 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
1577 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
1579 Mat src = _src.getMat(), mask = _mask.getMat();
1581 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1582 size_t total_size = src.total();
1583 int rows = src.size[0], cols = (int)(total_size/rows);
1584 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1586 IppiSize sz = { cols * cn, rows };
1590 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
1591 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1593 CV_SUPPRESS_DEPRECATED_START
1594 ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
1595 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
1596 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
1597 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
1598 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
1599 CV_SUPPRESS_DEPRECATED_END
1604 IppiPoint minp, maxp;
1605 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1608 *minVal = (double)min;
1610 *maxVal = (double)max;
1611 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.data[0] )
1612 minp.x = maxp.x = -1;
1615 size_t minidx = minp.y * cols + minp.x + 1;
1616 ofs2idx(src, minidx, minIdx);
1620 size_t maxidx = maxp.y * cols + maxp.x + 1;
1621 ofs2idx(src, maxidx, maxIdx);
1625 setIppErrorStatus();
1630 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1632 CV_SUPPRESS_DEPRECATED_START
1633 ippiMinMaxIndxFuncC1 ippFuncC1 =
1634 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
1635 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
1636 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
1637 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 0;
1638 CV_SUPPRESS_DEPRECATED_END
1643 IppiPoint minp, maxp;
1644 if( ippFuncC1(src.data, (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1647 *minVal = (double)min;
1649 *maxVal = (double)max;
1652 size_t minidx = minp.y * cols + minp.x + 1;
1653 ofs2idx(src, minidx, minIdx);
1657 size_t maxidx = maxp.y * cols + maxp.x + 1;
1658 ofs2idx(src, maxidx, maxIdx);
1662 setIppErrorStatus();
1668 MinMaxIdxFunc func = getMinmaxTab(depth);
1669 CV_Assert( func != 0 );
1671 const Mat* arrays[] = {&src, &mask, 0};
1673 NAryMatIterator it(arrays, ptrs);
1675 size_t minidx = 0, maxidx = 0;
1676 int iminval = INT_MAX, imaxval = INT_MIN;
1677 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
1678 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
1679 size_t startidx = 1;
1680 int *minval = &iminval, *maxval = &imaxval;
1681 int planeSize = (int)it.size*cn;
1683 if( depth == CV_32F )
1684 minval = (int*)&fminval, maxval = (int*)&fmaxval;
1685 else if( depth == CV_64F )
1686 minval = (int*)&dminval, maxval = (int*)&dmaxval;
1688 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
1689 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
1692 dminval = dmaxval = 0;
1693 else if( depth == CV_32F )
1694 dminval = fminval, dmaxval = fmaxval;
1695 else if( depth <= CV_32S )
1696 dminval = iminval, dmaxval = imaxval;
1704 ofs2idx(src, minidx, minIdx);
1706 ofs2idx(src, maxidx, maxIdx);
1709 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
1710 Point* minLoc, Point* maxLoc, InputArray mask )
1712 CV_Assert(_img.dims() <= 2);
1714 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
1716 std::swap(minLoc->x, minLoc->y);
1718 std::swap(maxLoc->x, maxLoc->y);
1721 /****************************************************************************************\
1723 \****************************************************************************************/
1728 float normL2Sqr_(const float* a, const float* b, int n)
1730 int j = 0; float d = 0.f;
1734 float CV_DECL_ALIGNED(16) buf[4];
1735 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1737 for( ; j <= n - 8; j += 8 )
1739 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1740 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1741 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
1742 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
1744 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1745 d = buf[0] + buf[1] + buf[2] + buf[3];
1750 for( ; j <= n - 4; j += 4 )
1752 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
1753 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
1759 float t = a[j] - b[j];
1766 float normL1_(const float* a, const float* b, int n)
1768 int j = 0; float d = 0.f;
1772 float CV_DECL_ALIGNED(16) buf[4];
1773 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
1774 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1775 __m128 absmask = _mm_load_ps((const float*)absbuf);
1777 for( ; j <= n - 8; j += 8 )
1779 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1780 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1781 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
1782 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
1784 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1785 d = buf[0] + buf[1] + buf[2] + buf[3];
1790 for( ; j <= n - 4; j += 4 )
1792 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1793 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1798 d += std::abs(a[j] - b[j]);
1802 int normL1_(const uchar* a, const uchar* b, int n)
1808 __m128i d0 = _mm_setzero_si128();
1810 for( ; j <= n - 16; j += 16 )
1812 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
1813 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
1815 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1818 for( ; j <= n - 4; j += 4 )
1820 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
1821 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
1823 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1825 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
1830 for( ; j <= n - 4; j += 4 )
1832 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1833 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1837 d += std::abs(a[j] - b[j]);
1841 static const uchar popCountTable[] =
1843 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1844 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1845 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1846 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1847 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1848 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1849 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1850 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
1853 static const uchar popCountTable2[] =
1855 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1856 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1857 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1858 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1859 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1860 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1861 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1862 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
1865 static const uchar popCountTable4[] =
1867 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1868 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1869 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1870 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1871 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1872 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1873 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1874 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1877 static int normHamming(const uchar* a, int n)
1879 int i = 0, result = 0;
1882 uint32x4_t bits = vmovq_n_u32(0);
1883 for (; i <= n - 16; i += 16) {
1884 uint8x16_t A_vec = vld1q_u8 (a + i);
1885 uint8x16_t bitsSet = vcntq_u8 (A_vec);
1886 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1887 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1888 bits = vaddq_u32(bits, bitSet4);
1890 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1891 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1892 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1895 for( ; i <= n - 4; i += 4 )
1896 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
1897 popCountTable[a[i+2]] + popCountTable[a[i+3]];
1899 result += popCountTable[a[i]];
1903 int normHamming(const uchar* a, const uchar* b, int n)
1905 int i = 0, result = 0;
1908 uint32x4_t bits = vmovq_n_u32(0);
1909 for (; i <= n - 16; i += 16) {
1910 uint8x16_t A_vec = vld1q_u8 (a + i);
1911 uint8x16_t B_vec = vld1q_u8 (b + i);
1912 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
1913 uint8x16_t bitsSet = vcntq_u8 (AxorB);
1914 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1915 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1916 bits = vaddq_u32(bits, bitSet4);
1918 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1919 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1920 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1923 for( ; i <= n - 4; i += 4 )
1924 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
1925 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
1927 result += popCountTable[a[i] ^ b[i]];
1931 static int normHamming(const uchar* a, int n, int cellSize)
1934 return normHamming(a, n);
1935 const uchar* tab = 0;
1937 tab = popCountTable2;
1938 else if( cellSize == 4 )
1939 tab = popCountTable4;
1941 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1942 int i = 0, result = 0;
1943 #if CV_ENABLE_UNROLLED
1944 for( ; i <= n - 4; i += 4 )
1945 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
1948 result += tab[a[i]];
1952 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1955 return normHamming(a, b, n);
1956 const uchar* tab = 0;
1958 tab = popCountTable2;
1959 else if( cellSize == 4 )
1960 tab = popCountTable4;
1962 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1963 int i = 0, result = 0;
1964 #if CV_ENABLE_UNROLLED
1965 for( ; i <= n - 4; i += 4 )
1966 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1967 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1970 result += tab[a[i] ^ b[i]];
1975 template<typename T, typename ST> int
1976 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1978 ST result = *_result;
1981 result = std::max(result, normInf<T, ST>(src, len*cn));
1985 for( int i = 0; i < len; i++, src += cn )
1988 for( int k = 0; k < cn; k++ )
1989 result = std::max(result, ST(std::abs(src[k])));
1996 template<typename T, typename ST> int
1997 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1999 ST result = *_result;
2002 result += normL1<T, ST>(src, len*cn);
2006 for( int i = 0; i < len; i++, src += cn )
2009 for( int k = 0; k < cn; k++ )
2010 result += std::abs(src[k]);
2017 template<typename T, typename ST> int
2018 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2020 ST result = *_result;
2023 result += normL2Sqr<T, ST>(src, len*cn);
2027 for( int i = 0; i < len; i++, src += cn )
2030 for( int k = 0; k < cn; k++ )
2041 template<typename T, typename ST> int
2042 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2044 ST result = *_result;
2047 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
2051 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2054 for( int k = 0; k < cn; k++ )
2055 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
2062 template<typename T, typename ST> int
2063 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2065 ST result = *_result;
2068 result += normL1<T, ST>(src1, src2, len*cn);
2072 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2075 for( int k = 0; k < cn; k++ )
2076 result += std::abs(src1[k] - src2[k]);
2083 template<typename T, typename ST> int
2084 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2086 ST result = *_result;
2089 result += normL2Sqr<T, ST>(src1, src2, len*cn);
2093 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2096 for( int k = 0; k < cn; k++ )
2098 ST v = src1[k] - src2[k];
2108 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
2109 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
2110 { return norm##L##_(src, mask, r, len, cn); } \
2111 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
2112 const uchar* mask, ntype* r, int len, int cn) \
2113 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
2115 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
2116 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
2117 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
2118 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
2120 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
2121 CV_DEF_NORM_ALL(8s, schar, int, int, int)
2122 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
2123 CV_DEF_NORM_ALL(16s, short, int, int, double)
2124 CV_DEF_NORM_ALL(32s, int, int, double, double)
2125 CV_DEF_NORM_ALL(32f, float, float, double, double)
2126 CV_DEF_NORM_ALL(64f, double, double, double, double)
2129 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
2130 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
2132 static NormFunc getNormFunc(int normType, int depth)
2134 static NormFunc normTab[3][8] =
2137 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
2138 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
2141 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
2142 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
2145 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
2146 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
2150 return normTab[normType][depth];
2153 static NormDiffFunc getNormDiffFunc(int normType, int depth)
2155 static NormDiffFunc normDiffTab[3][8] =
2158 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
2159 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
2160 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
2161 (NormDiffFunc)normDiffInf_64f, 0
2164 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
2165 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
2166 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
2167 (NormDiffFunc)normDiffL1_64f, 0
2170 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
2171 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
2172 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
2173 (NormDiffFunc)normDiffL2_64f, 0
2177 return normDiffTab[normType][depth];
2182 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
2184 const ocl::Device & d = ocl::Device::getDefault();
2185 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2186 bool doubleSupport = d.doubleFPConfig() > 0,
2187 haveMask = _mask.kind() != _InputArray::NONE;
2189 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
2190 (!doubleSupport && depth == CV_64F))
2193 if( depth == CV_32F && (!_mask.empty() || normType == NORM_INF) )
2196 UMat src = _src.getUMat();
2198 if (normType == NORM_INF)
2200 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
2201 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
2204 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2207 bool unstype = depth == CV_8U || depth == CV_16U;
2209 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
2210 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2217 for (int i = 0; i < cn; ++i)
2220 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2230 double cv::norm( InputArray _src, int normType, InputArray _mask )
2232 normType &= NORM_TYPE_MASK;
2233 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2234 normType == NORM_L2 || normType == NORM_L2SQR ||
2235 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
2239 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
2240 ocl_norm(_src, normType, _mask, _result),
2244 Mat src = _src.getMat(), mask = _mask.getMat();
2245 int depth = src.depth(), cn = src.channels();
2247 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2248 size_t total_size = src.total();
2249 int rows = src.size[0], cols = (int)(total_size/rows);
2251 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
2252 && cols > 0 && (size_t)rows*cols == total_size
2253 && (normType == NORM_INF || normType == NORM_L1 ||
2254 normType == NORM_L2 || normType == NORM_L2SQR) )
2256 IppiSize sz = { cols, rows };
2257 int type = src.type();
2260 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2261 ippiMaskNormFuncC1 ippFuncC1 =
2262 normType == NORM_INF ?
2263 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
2264 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
2265 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
2266 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
2268 normType == NORM_L1 ?
2269 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
2270 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
2271 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
2272 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
2274 normType == NORM_L2 || normType == NORM_L2SQR ?
2275 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
2276 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
2277 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
2278 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
2283 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2284 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2286 setIppErrorStatus();
2288 /*typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2289 ippiMaskNormFuncC3 ippFuncC3 =
2290 normType == NORM_INF ?
2291 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
2292 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
2293 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
2294 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
2296 normType == NORM_L1 ?
2297 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
2298 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
2299 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
2300 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
2302 normType == NORM_L2 || normType == NORM_L2SQR ?
2303 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
2304 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
2305 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
2306 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
2310 Ipp64f norm1, norm2, norm3;
2311 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2312 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2313 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2316 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2317 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2318 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2320 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2322 setIppErrorStatus();
2327 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2328 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
2329 ippiNormFuncHint ippFuncHint =
2330 normType == NORM_L1 ?
2331 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
2332 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
2333 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
2335 normType == NORM_L2 || normType == NORM_L2SQR ?
2336 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
2337 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
2338 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
2340 ippiNormFuncNoHint ippFuncNoHint =
2341 normType == NORM_INF ?
2342 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
2343 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
2344 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
2345 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
2346 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
2347 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
2348 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
2349 #if (IPP_VERSION_X100 >= 801)
2350 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2351 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2353 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
2354 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
2355 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
2357 normType == NORM_L1 ?
2358 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
2359 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
2360 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
2361 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
2362 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
2363 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
2364 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
2365 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
2366 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
2368 normType == NORM_L2 || normType == NORM_L2SQR ?
2369 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
2370 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
2371 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
2372 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
2373 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
2374 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
2375 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
2376 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
2377 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
2379 // Make sure only zero or one version of the function pointer is valid
2380 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2381 if( ippFuncHint || ippFuncNoHint )
2383 Ipp64f norm_array[4];
2384 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
2385 ippFuncNoHint(src.data, (int)src.step[0], sz, norm_array);
2388 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2389 for( int i = 1; i < cn; i++ )
2392 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2393 normType == NORM_L1 ? norm + norm_array[i] :
2394 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2397 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2399 setIppErrorStatus();
2405 if( src.isContinuous() && mask.empty() )
2407 size_t len = src.total()*cn;
2408 if( len == (size_t)(int)len )
2410 if( depth == CV_32F )
2412 const float* data = src.ptr<float>();
2414 if( normType == NORM_L2 )
2417 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2418 return std::sqrt(result);
2420 if( normType == NORM_L2SQR )
2423 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2426 if( normType == NORM_L1 )
2429 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
2432 if( normType == NORM_INF )
2435 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
2439 if( depth == CV_8U )
2441 const uchar* data = src.ptr<uchar>();
2443 if( normType == NORM_HAMMING )
2444 return normHamming(data, (int)len);
2446 if( normType == NORM_HAMMING2 )
2447 return normHamming(data, (int)len, 2);
2452 CV_Assert( mask.empty() || mask.type() == CV_8U );
2454 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2459 bitwise_and(src, mask, temp);
2460 return norm(temp, normType);
2462 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2464 const Mat* arrays[] = {&src, 0};
2466 NAryMatIterator it(arrays, ptrs);
2467 int total = (int)it.size;
2470 for( size_t i = 0; i < it.nplanes; i++, ++it )
2471 result += normHamming(ptrs[0], total, cellSize);
2476 NormFunc func = getNormFunc(normType >> 1, depth);
2477 CV_Assert( func != 0 );
2479 const Mat* arrays[] = {&src, &mask, 0};
2489 NAryMatIterator it(arrays, ptrs);
2490 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2491 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2492 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2494 int *ibuf = &result.i;
2499 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
2500 blockSize = std::min(blockSize, intSumBlockSize);
2502 esz = src.elemSize();
2505 for( size_t i = 0; i < it.nplanes; i++, ++it )
2507 for( j = 0; j < total; j += blockSize )
2509 int bsz = std::min(total - j, blockSize);
2510 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
2512 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2524 if( normType == NORM_INF )
2526 if( depth == CV_64F )
2528 else if( depth == CV_32F )
2529 result.d = result.f;
2531 result.d = result.i;
2533 else if( normType == NORM_L2 )
2534 result.d = std::sqrt(result.d);
2543 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2546 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2547 bool relative = (normType & NORM_RELATIVE) != 0;
2548 normType &= ~NORM_RELATIVE;
2549 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
2551 if ( !normsum || !_mask.empty() )
2556 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
2557 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
2562 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
2563 false, _src2, relative ? &sc2[0] : NULL))
2569 for (int i = 0; i < cn; ++i)
2576 if (normType == NORM_L2)
2578 result = std::sqrt(result);
2584 result /= (s2 + DBL_EPSILON);
2593 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
2595 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
2599 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
2600 ocl_norm(_src1, _src2, normType, _mask, _result),
2604 if( normType & CV_RELATIVE )
2606 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2607 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2609 normType &= NORM_TYPE_MASK;
2610 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
2611 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2612 size_t total_size = src1.total();
2613 int rows = src1.size[0], cols = (int)(total_size/rows);
2614 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2615 && cols > 0 && (size_t)rows*cols == total_size
2616 && (normType == NORM_INF || normType == NORM_L1 ||
2617 normType == NORM_L2 || normType == NORM_L2SQR) )
2619 IppiSize sz = { cols, rows };
2620 int type = src1.type();
2623 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2624 ippiMaskNormRelFuncC1 ippFuncC1 =
2625 normType == NORM_INF ?
2626 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
2627 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
2628 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
2629 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
2631 normType == NORM_L1 ?
2632 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
2633 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
2634 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
2635 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
2637 normType == NORM_L2 || normType == NORM_L2SQR ?
2638 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
2639 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
2640 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
2641 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
2646 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2647 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2648 setIppErrorStatus();
2653 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2654 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2655 ippiNormRelFuncNoHint ippFuncNoHint =
2656 normType == NORM_INF ?
2657 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
2658 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
2659 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
2660 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
2662 normType == NORM_L1 ?
2663 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
2664 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
2665 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
2667 normType == NORM_L2 || normType == NORM_L2SQR ?
2668 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
2669 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
2670 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
2672 ippiNormRelFuncHint ippFuncHint =
2673 normType == NORM_L1 ?
2674 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
2676 normType == NORM_L2 || normType == NORM_L2SQR ?
2677 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
2682 if( ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm) >= 0 )
2683 return (double)norm;
2684 setIppErrorStatus();
2689 if( ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
2690 return (double)norm;
2691 setIppErrorStatus();
2696 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
2699 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2700 int depth = src1.depth(), cn = src1.channels();
2703 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2704 normType == NORM_L2 || normType == NORM_L2SQR ||
2705 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2707 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2708 size_t total_size = src1.total();
2709 int rows = src1.size[0], cols = (int)(total_size/rows);
2710 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2711 && cols > 0 && (size_t)rows*cols == total_size
2712 && (normType == NORM_INF || normType == NORM_L1 ||
2713 normType == NORM_L2 || normType == NORM_L2SQR) )
2715 IppiSize sz = { cols, rows };
2716 int type = src1.type();
2719 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2720 ippiMaskNormDiffFuncC1 ippFuncC1 =
2721 normType == NORM_INF ?
2722 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
2723 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
2724 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
2725 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
2727 normType == NORM_L1 ?
2728 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
2729 //type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
2730 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
2731 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
2733 normType == NORM_L2 || normType == NORM_L2SQR ?
2734 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
2735 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
2736 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
2737 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
2742 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2743 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2744 setIppErrorStatus();
2746 /*typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2747 ippiMaskNormDiffFuncC3 ippFuncC3 =
2748 normType == NORM_INF ?
2749 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
2750 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
2751 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
2752 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
2754 normType == NORM_L1 ?
2755 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
2756 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
2757 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
2758 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
2760 normType == NORM_L2 || normType == NORM_L2SQR ?
2761 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
2762 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
2763 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
2764 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
2768 Ipp64f norm1, norm2, norm3;
2769 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2770 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2771 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2774 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2775 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2776 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2778 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2780 setIppErrorStatus();
2785 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2786 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2787 ippiNormDiffFuncHint ippFuncHint =
2788 normType == NORM_L1 ?
2789 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
2790 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
2791 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
2793 normType == NORM_L2 || normType == NORM_L2SQR ?
2794 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
2795 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
2796 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
2798 ippiNormDiffFuncNoHint ippFuncNoHint =
2799 normType == NORM_INF ?
2800 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
2801 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
2802 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
2803 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
2804 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
2805 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
2806 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
2807 #if (IPP_VERSION_X100 >= 801)
2808 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2809 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2811 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
2812 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
2813 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
2815 normType == NORM_L1 ?
2816 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
2817 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
2818 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
2819 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
2820 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
2821 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
2822 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
2823 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
2824 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
2826 normType == NORM_L2 || normType == NORM_L2SQR ?
2827 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
2828 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
2829 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
2830 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
2831 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
2832 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
2833 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
2834 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
2835 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
2837 // Make sure only zero or one version of the function pointer is valid
2838 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2839 if( ippFuncHint || ippFuncNoHint )
2841 Ipp64f norm_array[4];
2842 IppStatus ret = ippFuncHint ? ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
2843 ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array);
2846 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2847 for( int i = 1; i < src1.channels(); i++ )
2850 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2851 normType == NORM_L1 ? norm + norm_array[i] :
2852 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2855 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2857 setIppErrorStatus();
2863 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
2865 size_t len = src1.total()*src1.channels();
2866 if( len == (size_t)(int)len )
2868 if( src1.depth() == CV_32F )
2870 const float* data1 = src1.ptr<float>();
2871 const float* data2 = src2.ptr<float>();
2873 if( normType == NORM_L2 )
2876 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2877 return std::sqrt(result);
2879 if( normType == NORM_L2SQR )
2882 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2885 if( normType == NORM_L1 )
2888 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
2891 if( normType == NORM_INF )
2894 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
2901 CV_Assert( mask.empty() || mask.type() == CV_8U );
2903 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2908 bitwise_xor(src1, src2, temp);
2909 bitwise_and(temp, mask, temp);
2910 return norm(temp, normType);
2912 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2914 const Mat* arrays[] = {&src1, &src2, 0};
2916 NAryMatIterator it(arrays, ptrs);
2917 int total = (int)it.size;
2920 for( size_t i = 0; i < it.nplanes; i++, ++it )
2921 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
2926 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
2927 CV_Assert( func != 0 );
2929 const Mat* arrays[] = {&src1, &src2, &mask, 0};
2940 NAryMatIterator it(arrays, ptrs);
2941 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2942 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2943 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2945 unsigned *ibuf = &result.u;
2950 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
2951 blockSize = std::min(blockSize, intSumBlockSize);
2953 esz = src1.elemSize();
2956 for( size_t i = 0; i < it.nplanes; i++, ++it )
2958 for( j = 0; j < total; j += blockSize )
2960 int bsz = std::min(total - j, blockSize);
2961 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
2963 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2976 if( normType == NORM_INF )
2978 if( depth == CV_64F )
2980 else if( depth == CV_32F )
2981 result.d = result.f;
2983 result.d = result.u;
2985 else if( normType == NORM_L2 )
2986 result.d = std::sqrt(result.d);
2992 ///////////////////////////////////// batch distance ///////////////////////////////////////
2997 template<typename _Tp, typename _Rt>
2998 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
2999 int nvecs, int len, _Rt* dist, const uchar* mask)
3001 step2 /= sizeof(src2[0]);
3004 for( int i = 0; i < nvecs; i++ )
3005 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
3009 _Rt val0 = std::numeric_limits<_Rt>::max();
3010 for( int i = 0; i < nvecs; i++ )
3011 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3015 template<typename _Tp, typename _Rt>
3016 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
3017 int nvecs, int len, _Rt* dist, const uchar* mask)
3019 step2 /= sizeof(src2[0]);
3022 for( int i = 0; i < nvecs; i++ )
3023 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
3027 _Rt val0 = std::numeric_limits<_Rt>::max();
3028 for( int i = 0; i < nvecs; i++ )
3029 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3033 template<typename _Tp, typename _Rt>
3034 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
3035 int nvecs, int len, _Rt* dist, const uchar* mask)
3037 step2 /= sizeof(src2[0]);
3040 for( int i = 0; i < nvecs; i++ )
3041 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
3045 _Rt val0 = std::numeric_limits<_Rt>::max();
3046 for( int i = 0; i < nvecs; i++ )
3047 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
3051 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
3052 int nvecs, int len, int* dist, const uchar* mask)
3054 step2 /= sizeof(src2[0]);
3057 for( int i = 0; i < nvecs; i++ )
3058 dist[i] = normHamming(src1, src2 + step2*i, len);
3063 for( int i = 0; i < nvecs; i++ )
3064 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
3068 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
3069 int nvecs, int len, int* dist, const uchar* mask)
3071 step2 /= sizeof(src2[0]);
3074 for( int i = 0; i < nvecs; i++ )
3075 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
3080 for( int i = 0; i < nvecs; i++ )
3081 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
3085 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3086 int nvecs, int len, int* dist, const uchar* mask)
3088 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3091 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3092 int nvecs, int len, float* dist, const uchar* mask)
3094 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3097 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3098 int nvecs, int len, int* dist, const uchar* mask)
3100 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3103 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3104 int nvecs, int len, float* dist, const uchar* mask)
3106 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3109 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3110 int nvecs, int len, float* dist, const uchar* mask)
3112 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3115 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
3116 int nvecs, int len, float* dist, const uchar* mask)
3118 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3121 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
3122 int nvecs, int len, float* dist, const uchar* mask)
3124 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3127 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
3128 int nvecs, int len, float* dist, const uchar* mask)
3130 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3133 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
3134 int nvecs, int len, uchar* dist, const uchar* mask);
3137 struct BatchDistInvoker : public ParallelLoopBody
3139 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
3140 Mat& _dist, Mat& _nidx, int _K,
3141 const Mat& _mask, int _update,
3142 BatchDistFunc _func)
3154 void operator()(const Range& range) const
3156 AutoBuffer<int> buf(src2->rows);
3159 for( int i = range.start; i < range.end; i++ )
3161 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
3162 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
3166 int* nidxptr = nidx->ptr<int>(i);
3167 // since positive float's can be compared just like int's,
3168 // we handle both CV_32S and CV_32F cases with a single branch
3169 int* distptr = (int*)dist->ptr(i);
3173 for( j = 0; j < src2->rows; j++ )
3176 if( d < distptr[K-1] )
3178 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
3180 nidxptr[k+1] = nidxptr[k];
3181 distptr[k+1] = distptr[k];
3183 nidxptr[k+1] = j + update;
3203 void cv::batchDistance( InputArray _src1, InputArray _src2,
3204 OutputArray _dist, int dtype, OutputArray _nidx,
3205 int normType, int K, InputArray _mask,
3206 int update, bool crosscheck )
3208 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3209 int type = src1.type();
3210 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
3211 (type == CV_32F || type == CV_8U));
3212 CV_Assert( _nidx.needed() == (K > 0) );
3216 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
3218 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
3220 K = std::min(K, src2.rows);
3222 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
3223 Mat dist = _dist.getMat(), nidx;
3224 if( _nidx.needed() )
3226 _nidx.create(dist.size(), CV_32S);
3227 nidx = _nidx.getMat();
3230 if( update == 0 && K > 0 )
3232 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
3233 nidx = Scalar::all(-1);
3238 CV_Assert( K == 1 && update == 0 && mask.empty() );
3240 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
3242 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
3243 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
3244 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
3245 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
3246 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
3247 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
3248 if( dtype == CV_32S )
3250 for( int i = 0; i < tdist.rows; i++ )
3252 int idx = tidx.at<int>(i);
3253 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
3256 dist.at<int>(idx) = d;
3257 nidx.at<int>(idx) = i + update;
3263 for( int i = 0; i < tdist.rows; i++ )
3265 int idx = tidx.at<int>(i);
3266 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
3269 dist.at<float>(idx) = d;
3270 nidx.at<int>(idx) = i + update;
3277 BatchDistFunc func = 0;
3280 if( normType == NORM_L1 && dtype == CV_32S )
3281 func = (BatchDistFunc)batchDistL1_8u32s;
3282 else if( normType == NORM_L1 && dtype == CV_32F )
3283 func = (BatchDistFunc)batchDistL1_8u32f;
3284 else if( normType == NORM_L2SQR && dtype == CV_32S )
3285 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
3286 else if( normType == NORM_L2SQR && dtype == CV_32F )
3287 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
3288 else if( normType == NORM_L2 && dtype == CV_32F )
3289 func = (BatchDistFunc)batchDistL2_8u32f;
3290 else if( normType == NORM_HAMMING && dtype == CV_32S )
3291 func = (BatchDistFunc)batchDistHamming;
3292 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
3293 func = (BatchDistFunc)batchDistHamming2;
3295 else if( type == CV_32F && dtype == CV_32F )
3297 if( normType == NORM_L1 )
3298 func = (BatchDistFunc)batchDistL1_32f;
3299 else if( normType == NORM_L2SQR )
3300 func = (BatchDistFunc)batchDistL2Sqr_32f;
3301 else if( normType == NORM_L2 )
3302 func = (BatchDistFunc)batchDistL2_32f;
3306 CV_Error_(CV_StsUnsupportedFormat,
3307 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
3308 type, dtype, normType));
3310 parallel_for_(Range(0, src1.rows),
3311 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
3315 void cv::findNonZero( InputArray _src, OutputArray _idx )
3317 Mat src = _src.getMat();
3318 CV_Assert( src.type() == CV_8UC1 );
3319 int n = countNonZero(src);
3320 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
3322 _idx.create(n, 1, CV_32SC2);
3323 Mat idx = _idx.getMat();
3324 CV_Assert(idx.isContinuous());
3325 Point* idx_ptr = (Point*)idx.data;
3327 for( int i = 0; i < src.rows; i++ )
3329 const uchar* bin_ptr = src.ptr(i);
3330 for( int j = 0; j < src.cols; j++ )
3332 *idx_ptr++ = Point(j, i);
3336 double cv::PSNR(InputArray _src1, InputArray _src2)
3338 CV_Assert( _src1.depth() == CV_8U );
3339 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
3340 return 20*log10(255./(diff+DBL_EPSILON));
3344 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
3346 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
3347 if( CV_IS_IMAGE(srcarr) )
3349 int coi = cvGetImageCOI((IplImage*)srcarr);
3352 CV_Assert( 0 < coi && coi <= 4 );
3353 sum = cv::Scalar(sum[coi-1]);
3359 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
3361 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3362 if( img.channels() > 1 )
3363 cv::extractImageCOI(imgarr, img);
3364 return countNonZero(img);
3369 cvAvg( const void* imgarr, const void* maskarr )
3371 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3372 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
3373 if( CV_IS_IMAGE(imgarr) )
3375 int coi = cvGetImageCOI((IplImage*)imgarr);
3378 CV_Assert( 0 < coi && coi <= 4 );
3379 mean = cv::Scalar(mean[coi-1]);
3387 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
3389 cv::Scalar mean, sdv;
3393 mask = cv::cvarrToMat(maskarr);
3395 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
3397 if( CV_IS_IMAGE(imgarr) )
3399 int coi = cvGetImageCOI((IplImage*)imgarr);
3402 CV_Assert( 0 < coi && coi <= 4 );
3403 mean = cv::Scalar(mean[coi-1]);
3404 sdv = cv::Scalar(sdv[coi-1]);
3409 *(cv::Scalar*)_mean = mean;
3411 *(cv::Scalar*)_sdv = sdv;
3416 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
3417 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
3419 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
3421 mask = cv::cvarrToMat(maskarr);
3422 if( img.channels() > 1 )
3423 cv::extractImageCOI(imgarr, img);
3425 cv::minMaxLoc( img, _minVal, _maxVal,
3426 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
3431 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
3440 a = cv::cvarrToMat(imgA, false, true, 1);
3442 mask = cv::cvarrToMat(maskarr);
3444 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
3445 cv::extractImageCOI(imgA, a);
3448 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
3450 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
3451 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
3452 cv::extractImageCOI(imgB, b);
3454 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);