1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
47 #include "opencl_kernels.hpp"
52 template<typename T> static inline Scalar rawToScalar(const T& v)
55 typedef typename DataType<T>::channel_type T1;
56 int i, n = DataType<T>::channels;
57 for( i = 0; i < n; i++ )
58 s.val[i] = ((T1*)&v)[i];
62 /****************************************************************************************\
64 \****************************************************************************************/
66 template<typename T, typename ST>
67 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
78 #if CV_ENABLE_UNROLLED
79 for(; i <= len - 4; i += 4, src += cn*4 )
80 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
82 for( ; i < len; i++, src += cn )
88 ST s0 = dst[0], s1 = dst[1];
89 for( i = 0; i < len; i++, src += cn )
99 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
100 for( i = 0; i < len; i++, src += cn )
111 for( ; k < cn; k += 4 )
114 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
115 for( i = 0; i < len; i++, src += cn )
117 s0 += src[0]; s1 += src[1];
118 s2 += src[2]; s3 += src[3];
132 for( i = 0; i < len; i++ )
142 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
143 for( i = 0; i < len; i++, src += 3 )
157 for( i = 0; i < len; i++, src += cn )
161 #if CV_ENABLE_UNROLLED
162 for( ; k <= cn - 4; k += 4 )
165 s0 = dst[k] + src[k];
166 s1 = dst[k+1] + src[k+1];
167 dst[k] = s0; dst[k+1] = s1;
168 s0 = dst[k+2] + src[k+2];
169 s1 = dst[k+3] + src[k+3];
170 dst[k+2] = s0; dst[k+3] = s1;
182 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
201 { return sum_(src, mask, dst, len, cn); }
203 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
205 static SumFunc getSumFunc(int depth)
207 static SumFunc sumTab[] =
209 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
210 (SumFunc)sum16u, (SumFunc)sum16s,
212 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
216 return sumTab[depth];
220 static int countNonZero_(const T* src, int len )
223 #if CV_ENABLE_UNROLLED
224 for(; i <= len - 4; i += 4 )
225 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
227 for( ; i < len; i++ )
232 static int countNonZero8u( const uchar* src, int len )
238 __m128i pattern = _mm_setzero_si128 ();
239 static uchar tab[256];
240 static volatile bool initialized = false;
243 // we compute inverse popcount table,
244 // since we pass (img[x] == 0) mask as index in the table.
245 for( int j = 0; j < 256; j++ )
248 for( int mask = 1; mask < 256; mask += mask )
249 val += (j & mask) == 0;
255 for (; i<=len-16; i+=16)
257 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
258 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
259 nz += tab[val & 255] + tab[val >> 8];
263 for( ; i < len; i++ )
268 static int countNonZero16u( const ushort* src, int len )
269 { return countNonZero_(src, len); }
271 static int countNonZero32s( const int* src, int len )
272 { return countNonZero_(src, len); }
274 static int countNonZero32f( const float* src, int len )
275 { return countNonZero_(src, len); }
277 static int countNonZero64f( const double* src, int len )
278 { return countNonZero_(src, len); }
280 typedef int (*CountNonZeroFunc)(const uchar*, int);
282 static CountNonZeroFunc getCountNonZeroTab(int depth)
284 static CountNonZeroFunc countNonZeroTab[] =
286 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
287 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
288 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
289 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
292 return countNonZeroTab[depth];
295 template<typename T, typename ST, typename SQT>
296 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
309 for( i = 0; i < len; i++, src += cn )
312 s0 += v; sq0 += (SQT)v*v;
319 ST s0 = sum[0], s1 = sum[1];
320 SQT sq0 = sqsum[0], sq1 = sqsum[1];
321 for( i = 0; i < len; i++, src += cn )
323 T v0 = src[0], v1 = src[1];
324 s0 += v0; sq0 += (SQT)v0*v0;
325 s1 += v1; sq1 += (SQT)v1*v1;
327 sum[0] = s0; sum[1] = s1;
328 sqsum[0] = sq0; sqsum[1] = sq1;
332 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
333 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
334 for( i = 0; i < len; i++, src += cn )
336 T v0 = src[0], v1 = src[1], v2 = src[2];
337 s0 += v0; sq0 += (SQT)v0*v0;
338 s1 += v1; sq1 += (SQT)v1*v1;
339 s2 += v2; sq2 += (SQT)v2*v2;
341 sum[0] = s0; sum[1] = s1; sum[2] = s2;
342 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
345 for( ; k < cn; k += 4 )
348 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
349 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
350 for( i = 0; i < len; i++, src += cn )
353 v0 = src[0], v1 = src[1];
354 s0 += v0; sq0 += (SQT)v0*v0;
355 s1 += v1; sq1 += (SQT)v1*v1;
356 v0 = src[2], v1 = src[3];
357 s2 += v0; sq2 += (SQT)v0*v0;
358 s3 += v1; sq3 += (SQT)v1*v1;
360 sum[k] = s0; sum[k+1] = s1;
361 sum[k+2] = s2; sum[k+3] = s3;
362 sqsum[k] = sq0; sqsum[k+1] = sq1;
363 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
374 for( i = 0; i < len; i++ )
378 s0 += v; sq0 += (SQT)v*v;
386 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
387 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
388 for( i = 0; i < len; i++, src += 3 )
391 T v0 = src[0], v1 = src[1], v2 = src[2];
392 s0 += v0; sq0 += (SQT)v0*v0;
393 s1 += v1; sq1 += (SQT)v1*v1;
394 s2 += v2; sq2 += (SQT)v2*v2;
397 sum[0] = s0; sum[1] = s1; sum[2] = s2;
398 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
402 for( i = 0; i < len; i++, src += cn )
405 for( int k = 0; k < cn; k++ )
409 SQT sq = sqsum[k] + (SQT)v*v;
410 sum[k] = s; sqsum[k] = sq;
419 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
420 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
422 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
423 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
425 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
426 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
428 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
429 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
431 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
432 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
434 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
435 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
437 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
438 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
440 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
442 static SumSqrFunc getSumSqrTab(int depth)
444 static SumSqrFunc sumSqrTab[] =
446 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
447 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
450 return sumSqrTab[depth];
455 template <typename T> Scalar ocl_part_sum(Mat m)
457 CV_Assert(m.rows == 1);
459 Scalar s = Scalar::all(0);
460 int cn = m.channels();
461 const T * const ptr = m.ptr<T>(0);
463 for (int x = 0, w = m.cols * cn; x < w; )
464 for (int c = 0; c < cn; ++c, ++x)
470 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
472 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(),
473 InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() )
475 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
477 const ocl::Device & dev = ocl::Device::getDefault();
478 bool doubleSupport = dev.doubleFPConfig() > 0,
479 haveMask = _mask.kind() != _InputArray::NONE,
480 haveSrc2 = _src2.kind() != _InputArray::NONE;
481 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
482 kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src) : 1,
483 mcn = std::max(cn, kercn);
484 CV_Assert(!haveSrc2 || _src2.type() == type);
486 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
489 int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1);
490 size_t wgs = dev.maxWorkGroupSize();
492 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
493 dtype = CV_MAKE_TYPE(ddepth, cn);
494 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
496 int wgs2_aligned = 1;
497 while (wgs2_aligned < (int)wgs)
501 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
503 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
504 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s",
505 ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth),
506 ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)),
507 ocl::typeToStr(ddepth), ddepth, cn,
508 ocl::convertTypeStr(depth, ddepth, mcn, cvt),
509 opMap[sum_op], (int)wgs, wgs2_aligned,
510 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
511 haveMask ? " -D HAVE_MASK" : "",
512 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
513 haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
514 haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "",
515 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
517 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
521 UMat src = _src.getUMat(), src2 = _src2.getUMat(),
522 db(1, dbsize, dtype), mask = _mask.getUMat();
524 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
525 dbarg = ocl::KernelArg::PtrWriteOnly(db),
526 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
527 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2);
532 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg);
534 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg);
539 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg);
541 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg);
544 size_t globalsize = ngroups * wgs;
545 if (k.run(1, &globalsize, &wgs, false))
547 typedef Scalar (*part_sum)(Mat m);
548 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
549 func = funcs[ddepth - CV_32S];
551 Mat mres = db.getMat(ACCESS_READ);
553 const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize));
555 res = func(mres.colRange(0, ngroups));
565 cv::Scalar cv::sum( InputArray _src )
569 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
570 ocl_sum(_src, _res, OCL_OP_SUM),
574 Mat src = _src.getMat();
575 int k, cn = src.channels(), depth = src.depth();
577 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
578 size_t total_size = src.total();
579 int rows = src.size[0], cols = (int)(total_size/rows);
580 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
582 IppiSize sz = { cols, rows };
583 int type = src.type();
584 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
585 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
586 ippiSumFuncHint ippFuncHint =
587 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
588 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
589 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
591 ippiSumFuncNoHint ippFuncNoHint =
592 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
593 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
594 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
595 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
596 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
597 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
598 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
599 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
600 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
602 CV_Assert(!ippFuncHint || !ippFuncNoHint);
603 if( ippFuncHint || ippFuncNoHint )
606 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
607 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
611 for( int i = 0; i < cn; i++ )
620 SumFunc func = getSumFunc(depth);
622 CV_Assert( cn <= 4 && func != 0 );
624 const Mat* arrays[] = {&src, 0};
626 NAryMatIterator it(arrays, ptrs);
628 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
630 AutoBuffer<int> _buf;
631 int* buf = (int*)&s[0];
633 bool blockSum = depth < CV_32S;
637 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
638 blockSize = std::min(blockSize, intSumBlockSize);
642 for( k = 0; k < cn; k++ )
644 esz = src.elemSize();
647 for( size_t i = 0; i < it.nplanes; i++, ++it )
649 for( j = 0; j < total; j += blockSize )
651 int bsz = std::min(total - j, blockSize);
652 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
654 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
656 for( k = 0; k < cn; k++ )
673 static bool ocl_countNonZero( InputArray _src, int & res )
675 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src);
676 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
678 if (depth == CV_64F && !doubleSupport)
681 int dbsize = ocl::Device::getDefault().maxComputeUnits();
682 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
684 int wgs2_aligned = 1;
685 while (wgs2_aligned < (int)wgs)
689 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
690 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO"
691 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s",
692 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)),
693 ocl::typeToStr(depth), (int)wgs, kercn,
694 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
695 _src.isContinuous() ? " -D HAVE_SRC_CONT" : ""));
699 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
700 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
701 dbsize, ocl::KernelArg::PtrWriteOnly(db));
703 size_t globalsize = dbsize * wgs;
704 if (k.run(1, &globalsize, &wgs, true))
705 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
713 int cv::countNonZero( InputArray _src )
715 int type = _src.type(), cn = CV_MAT_CN(type);
716 CV_Assert( cn == 1 );
720 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
721 ocl_countNonZero(_src, res),
725 Mat src = _src.getMat();
727 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
728 if (src.dims <= 2 || src.isContinuous())
730 IppiSize roiSize = { src.cols, src.rows };
731 Ipp32s count = 0, srcstep = (Ipp32s)src.step;
732 IppStatus status = (IppStatus)-1;
734 if (src.isContinuous())
736 roiSize.width = (Ipp32s)src.total();
738 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
741 int depth = CV_MAT_DEPTH(type);
743 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
744 else if (depth == CV_32F)
745 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
748 return (Ipp32s)src.total() - count;
753 CountNonZeroFunc func = getCountNonZeroTab(src.depth());
754 CV_Assert( func != 0 );
756 const Mat* arrays[] = {&src, 0};
758 NAryMatIterator it(arrays, ptrs);
759 int total = (int)it.size, nz = 0;
761 for( size_t i = 0; i < it.nplanes; i++, ++it )
762 nz += func( ptrs[0], total );
767 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
769 Mat src = _src.getMat(), mask = _mask.getMat();
770 CV_Assert( mask.empty() || mask.type() == CV_8U );
772 int k, cn = src.channels(), depth = src.depth();
774 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
775 size_t total_size = src.total();
776 int rows = src.size[0], cols = (int)(total_size/rows);
777 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
779 IppiSize sz = { cols, rows };
780 int type = src.type();
783 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
784 ippiMaskMeanFuncC1 ippFuncC1 =
785 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
786 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
787 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
792 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &res) >= 0 )
796 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
797 ippiMaskMeanFuncC3 ippFuncC3 =
798 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
799 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
800 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
804 Ipp64f res1, res2, res3;
805 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &res1) >= 0 &&
806 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &res2) >= 0 &&
807 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &res3) >= 0 )
809 return Scalar(res1, res2, res3);
816 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
817 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
818 ippiMeanFuncHint ippFuncHint =
819 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
820 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
821 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
823 ippiMeanFuncNoHint ippFuncNoHint =
824 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
825 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
826 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
827 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
828 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
829 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
830 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
831 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
832 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
834 // Make sure only zero or one version of the function pointer is valid
835 CV_Assert(!ippFuncHint || !ippFuncNoHint);
836 if( ippFuncHint || ippFuncNoHint )
839 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
840 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
844 for( int i = 0; i < cn; i++ )
854 SumFunc func = getSumFunc(depth);
856 CV_Assert( cn <= 4 && func != 0 );
858 const Mat* arrays[] = {&src, &mask, 0};
860 NAryMatIterator it(arrays, ptrs);
862 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
864 AutoBuffer<int> _buf;
865 int* buf = (int*)&s[0];
866 bool blockSum = depth <= CV_16S;
867 size_t esz = 0, nz0 = 0;
871 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
872 blockSize = std::min(blockSize, intSumBlockSize);
876 for( k = 0; k < cn; k++ )
878 esz = src.elemSize();
881 for( size_t i = 0; i < it.nplanes; i++, ++it )
883 for( j = 0; j < total; j += blockSize )
885 int bsz = std::min(total - j, blockSize);
886 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
889 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
891 for( k = 0; k < cn; k++ )
903 return s*(nz0 ? 1./nz0 : 0);
910 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
912 bool haveMask = _mask.kind() != _InputArray::NONE;
913 int nz = haveMask ? -1 : (int)_src.total();
917 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
918 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
919 isContinuous = _src.isContinuous();
920 int groups = ocl::Device::getDefault().maxComputeUnits();
921 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
923 int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth),
924 dtype = CV_MAKE_TYPE(ddepth, cn),
925 sqdtype = CV_MAKETYPE(sqddepth, cn);
926 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
928 int wgs2_aligned = 1;
929 while (wgs2_aligned < (int)wgs)
933 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
937 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
938 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s"
939 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
940 ocl::typeToStr(type), ocl::typeToStr(depth),
941 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
942 ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
943 ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
944 cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
945 ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
946 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
947 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
949 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts);
953 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) +
954 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype));
955 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
957 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
958 dbarg = ocl::KernelArg::PtrWriteOnly(db),
959 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
962 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg);
964 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg);
966 size_t globalsize = groups * wgs;
967 if (!k.run(1, &globalsize, &wgs, false))
970 typedef Scalar (* part_sum)(Mat m);
971 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> };
972 Mat dbm = db.getMat(ACCESS_READ);
974 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.data));
975 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.data + groups * CV_ELEM_SIZE(dtype)));
978 nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.data +
979 groups * (CV_ELEM_SIZE(dtype) +
980 CV_ELEM_SIZE(sqdtype))))[0]);
983 double total = nz != 0 ? 1.0 / nz : 0;
984 int k, j, cn = _src.channels();
985 for (int i = 0; i < cn; ++i)
988 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
991 for( j = 0; j < 2; j++ )
993 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
994 _OutputArray _dst = j == 0 ? _mean : _sdv;
998 if( !_dst.fixedSize() )
999 _dst.create(cn, 1, CV_64F, -1, true);
1000 Mat dst = _dst.getMat();
1001 int dcn = (int)dst.total();
1002 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1003 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1004 double* dptr = dst.ptr<double>();
1005 for( k = 0; k < cn; k++ )
1007 for( ; k < dcn; k++ )
1018 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
1020 CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2,
1021 ocl_meanStdDev(_src, _mean, _sdv, _mask))
1023 Mat src = _src.getMat(), mask = _mask.getMat();
1024 CV_Assert( mask.empty() || mask.type() == CV_8UC1 );
1026 int k, cn = src.channels(), depth = src.depth();
1028 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1029 size_t total_size = src.total();
1030 int rows = src.size[0], cols = (int)(total_size/rows);
1031 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1033 Ipp64f mean_temp[3];
1034 Ipp64f stddev_temp[3];
1035 Ipp64f *pmean = &mean_temp[0];
1036 Ipp64f *pstddev = &stddev_temp[0];
1039 if( _mean.needed() )
1041 if( !_mean.fixedSize() )
1042 _mean.create(cn, 1, CV_64F, -1, true);
1043 mean = _mean.getMat();
1044 dcn_mean = (int)mean.total();
1045 pmean = (Ipp64f *)mean.data;
1047 int dcn_stddev = -1;
1050 if( !_sdv.fixedSize() )
1051 _sdv.create(cn, 1, CV_64F, -1, true);
1052 stddev = _sdv.getMat();
1053 dcn_stddev = (int)stddev.total();
1054 pstddev = (Ipp64f *)stddev.data;
1056 for( int c = cn; c < dcn_mean; c++ )
1058 for( int c = cn; c < dcn_stddev; c++ )
1060 IppiSize sz = { cols, rows };
1061 int type = src.type();
1064 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
1065 ippiMaskMeanStdDevFuncC1 ippFuncC1 =
1066 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
1067 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
1068 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
1072 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, pmean, pstddev) >= 0 )
1074 setIppErrorStatus();
1076 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1077 ippiMaskMeanStdDevFuncC3 ippFuncC3 =
1078 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
1079 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
1080 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
1084 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1085 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1086 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1088 setIppErrorStatus();
1093 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1094 ippiMeanStdDevFuncC1 ippFuncC1 =
1095 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
1096 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
1097 #if (IPP_VERSION_X100 >= 801)
1098 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
1103 if( ippFuncC1(src.data, (int)src.step[0], sz, pmean, pstddev) >= 0 )
1105 setIppErrorStatus();
1107 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1108 ippiMeanStdDevFuncC3 ippFuncC3 =
1109 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
1110 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
1111 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
1115 if( ippFuncC3(src.data, (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1116 ippFuncC3(src.data, (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1117 ippFuncC3(src.data, (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1119 setIppErrorStatus();
1126 SumSqrFunc func = getSumSqrTab(depth);
1128 CV_Assert( func != 0 );
1130 const Mat* arrays[] = {&src, &mask, 0};
1132 NAryMatIterator it(arrays, ptrs);
1133 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1134 int j, count = 0, nz0 = 0;
1135 AutoBuffer<double> _buf(cn*4);
1136 double *s = (double*)_buf, *sq = s + cn;
1137 int *sbuf = (int*)s, *sqbuf = (int*)sq;
1138 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
1141 for( k = 0; k < cn; k++ )
1146 intSumBlockSize = 1 << 15;
1147 blockSize = std::min(blockSize, intSumBlockSize);
1148 sbuf = (int*)(sq + cn);
1151 for( k = 0; k < cn; k++ )
1152 sbuf[k] = sqbuf[k] = 0;
1153 esz = src.elemSize();
1156 for( size_t i = 0; i < it.nplanes; i++, ++it )
1158 for( j = 0; j < total; j += blockSize )
1160 int bsz = std::min(total - j, blockSize);
1161 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
1164 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1166 for( k = 0; k < cn; k++ )
1173 for( k = 0; k < cn; k++ )
1187 double scale = nz0 ? 1./nz0 : 0.;
1188 for( k = 0; k < cn; k++ )
1191 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
1194 for( j = 0; j < 2; j++ )
1196 const double* sptr = j == 0 ? s : sq;
1197 _OutputArray _dst = j == 0 ? _mean : _sdv;
1198 if( !_dst.needed() )
1201 if( !_dst.fixedSize() )
1202 _dst.create(cn, 1, CV_64F, -1, true);
1203 Mat dst = _dst.getMat();
1204 int dcn = (int)dst.total();
1205 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1206 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1207 double* dptr = dst.ptr<double>();
1208 for( k = 0; k < cn; k++ )
1210 for( ; k < dcn; k++ )
1215 /****************************************************************************************\
1217 \****************************************************************************************/
1222 template<typename T, typename WT> static void
1223 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
1224 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
1226 WT minVal = *_minVal, maxVal = *_maxVal;
1227 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
1231 for( int i = 0; i < len; i++ )
1237 minIdx = startIdx + i;
1242 maxIdx = startIdx + i;
1248 for( int i = 0; i < len; i++ )
1251 if( mask[i] && val < minVal )
1254 minIdx = startIdx + i;
1256 if( mask[i] && val > maxVal )
1259 maxIdx = startIdx + i;
1270 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
1271 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1272 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1274 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
1275 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1276 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1278 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
1279 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1280 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1282 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
1283 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1284 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1286 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
1287 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1288 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1290 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
1291 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1292 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1294 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
1295 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1296 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1298 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
1300 static MinMaxIdxFunc getMinmaxTab(int depth)
1302 static MinMaxIdxFunc minmaxTab[] =
1304 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
1305 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
1306 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
1307 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
1311 return minmaxTab[depth];
1314 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
1320 for( i = d-1; i >= 0; i-- )
1323 idx[i] = (int)(ofs % sz);
1329 for( i = d-1; i >= 0; i-- )
1336 template <typename T>
1337 void getMinMaxRes(const Mat & db, double * minVal, double * maxVal,
1338 int* minLoc, int* maxLoc,
1339 int groupnum, int cols, double * maxVal2)
1341 uint index_max = std::numeric_limits<uint>::max();
1342 T minval = std::numeric_limits<T>::max();
1343 T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval;
1344 uint minloc = index_max, maxloc = index_max;
1347 const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL;
1348 const uint * minlocptr = NULL, * maxlocptr = NULL;
1349 if (minVal || minLoc)
1351 minptr = (const T *)db.data;
1352 index += sizeof(T) * groupnum;
1354 if (maxVal || maxLoc)
1356 maxptr = (const T *)(db.data + index);
1357 index += sizeof(T) * groupnum;
1361 minlocptr = (uint *)(db.data + index);
1362 index += sizeof(uint) * groupnum;
1366 maxlocptr = (uint *)(db.data + index);
1367 index += sizeof(uint) * groupnum;
1370 maxptr2 = (const T *)(db.data + index);
1372 for (int i = 0; i < groupnum; i++)
1374 if (minptr && minptr[i] <= minval)
1376 if (minptr[i] == minval)
1379 minloc = std::min(minlocptr[i], minloc);
1384 minloc = minlocptr[i];
1388 if (maxptr && maxptr[i] >= maxval)
1390 if (maxptr[i] == maxval)
1393 maxloc = std::min(maxlocptr[i], maxloc);
1398 maxloc = maxlocptr[i];
1402 if (maxptr2 && maxptr2[i] > maxval2)
1403 maxval2 = maxptr2[i];
1405 bool zero_mask = (minLoc && minloc == index_max) ||
1406 (maxLoc && maxloc == index_max);
1409 *minVal = zero_mask ? 0 : (double)minval;
1411 *maxVal = zero_mask ? 0 : (double)maxval;
1413 *maxVal2 = zero_mask ? 0 : (double)maxval2;
1417 minLoc[0] = zero_mask ? -1 : minloc / cols;
1418 minLoc[1] = zero_mask ? -1 : minloc % cols;
1422 maxLoc[0] = zero_mask ? -1 : maxloc / cols;
1423 maxLoc[1] = zero_mask ? -1 : maxloc % cols;
1427 typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal,
1428 int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2);
1430 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask,
1431 int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL)
1433 const ocl::Device & dev = ocl::Device::getDefault();
1434 bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
1435 haveSrc2 = _src2.kind() != _InputArray::NONE;
1436 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1437 kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src));
1439 CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) ||
1440 (cn >= 1 && !minLoc && !maxLoc) );
1445 CV_Assert(!haveSrc2 || _src2.type() == type);
1447 if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport)
1450 int groupnum = dev.maxComputeUnits();
1451 size_t wgs = dev.maxWorkGroupSize();
1453 int wgs2_aligned = 1;
1454 while (wgs2_aligned < (int)wgs)
1458 bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL,
1459 needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL;
1461 // in case of mask we must know whether mask is filled with zeros or not
1462 // so let's calculate min or max location, if it's undefined, so mask is zeros
1463 if (!(needMaxLoc || needMinLoc) && haveMask)
1472 String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s"
1473 " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s"
1474 " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s",
1475 depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs,
1476 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned,
1477 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
1478 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "",
1479 _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn,
1480 needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "",
1481 needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "",
1482 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
1483 ocl::convertTypeStr(depth, ddepth, kercn, cvt), absValues ? " -D OP_ABS" : "",
1484 haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "",
1485 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "");
1487 ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts);
1491 int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S),
1492 dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) +
1493 (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) +
1494 (maxVal2 ? esz : 0));
1495 UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat();
1497 if (cn > 1 && !haveMask)
1499 src = src.reshape(1);
1500 src2 = src2.reshape(1);
1506 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1507 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2));
1509 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1510 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask),
1511 ocl::KernelArg::ReadOnlyNoSize(src2));
1516 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1517 groupnum, ocl::KernelArg::PtrWriteOnly(db));
1519 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1520 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
1523 size_t globalsize = groupnum * wgs;
1524 if (!k.run(1, &globalsize, &wgs, false))
1527 static const getMinMaxResFunc functab[7] =
1529 getMinMaxRes<uchar>,
1531 getMinMaxRes<ushort>,
1532 getMinMaxRes<short>,
1534 getMinMaxRes<float>,
1535 getMinMaxRes<double>
1538 getMinMaxResFunc func = functab[ddepth];
1541 func(db.getMat(ACCESS_READ), minVal, maxVal,
1542 needMinLoc ? minLoc ? minLoc : locTemp : minLoc,
1543 needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc,
1544 groupnum, src.cols, maxVal2);
1553 void cv::minMaxIdx(InputArray _src, double* minVal,
1554 double* maxVal, int* minIdx, int* maxIdx,
1557 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1558 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1559 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
1561 CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
1562 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
1564 Mat src = _src.getMat(), mask = _mask.getMat();
1566 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1567 size_t total_size = src.total();
1568 int rows = src.size[0], cols = (int)(total_size/rows);
1569 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1571 IppiSize sz = { cols * cn, rows };
1575 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
1576 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1578 CV_SUPPRESS_DEPRECATED_START
1579 ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
1580 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
1581 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
1582 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
1583 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
1584 CV_SUPPRESS_DEPRECATED_END
1589 IppiPoint minp, maxp;
1590 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1593 *minVal = (double)min;
1595 *maxVal = (double)max;
1596 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.data[0] )
1597 minp.x = maxp.x = -1;
1600 size_t minidx = minp.y * cols + minp.x + 1;
1601 ofs2idx(src, minidx, minIdx);
1605 size_t maxidx = maxp.y * cols + maxp.x + 1;
1606 ofs2idx(src, maxidx, maxIdx);
1610 setIppErrorStatus();
1615 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1617 CV_SUPPRESS_DEPRECATED_START
1618 ippiMinMaxIndxFuncC1 ippFuncC1 =
1619 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
1620 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
1621 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
1622 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 0;
1623 CV_SUPPRESS_DEPRECATED_END
1628 IppiPoint minp, maxp;
1629 if( ippFuncC1(src.data, (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1632 *minVal = (double)min;
1634 *maxVal = (double)max;
1637 size_t minidx = minp.y * cols + minp.x + 1;
1638 ofs2idx(src, minidx, minIdx);
1642 size_t maxidx = maxp.y * cols + maxp.x + 1;
1643 ofs2idx(src, maxidx, maxIdx);
1647 setIppErrorStatus();
1653 MinMaxIdxFunc func = getMinmaxTab(depth);
1654 CV_Assert( func != 0 );
1656 const Mat* arrays[] = {&src, &mask, 0};
1658 NAryMatIterator it(arrays, ptrs);
1660 size_t minidx = 0, maxidx = 0;
1661 int iminval = INT_MAX, imaxval = INT_MIN;
1662 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
1663 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
1664 size_t startidx = 1;
1665 int *minval = &iminval, *maxval = &imaxval;
1666 int planeSize = (int)it.size*cn;
1668 if( depth == CV_32F )
1669 minval = (int*)&fminval, maxval = (int*)&fmaxval;
1670 else if( depth == CV_64F )
1671 minval = (int*)&dminval, maxval = (int*)&dmaxval;
1673 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
1674 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
1677 dminval = dmaxval = 0;
1678 else if( depth == CV_32F )
1679 dminval = fminval, dmaxval = fmaxval;
1680 else if( depth <= CV_32S )
1681 dminval = iminval, dmaxval = imaxval;
1689 ofs2idx(src, minidx, minIdx);
1691 ofs2idx(src, maxidx, maxIdx);
1694 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
1695 Point* minLoc, Point* maxLoc, InputArray mask )
1697 CV_Assert(_img.dims() <= 2);
1699 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
1701 std::swap(minLoc->x, minLoc->y);
1703 std::swap(maxLoc->x, maxLoc->y);
1706 /****************************************************************************************\
1708 \****************************************************************************************/
1713 float normL2Sqr_(const float* a, const float* b, int n)
1715 int j = 0; float d = 0.f;
1719 float CV_DECL_ALIGNED(16) buf[4];
1720 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1722 for( ; j <= n - 8; j += 8 )
1724 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1725 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1726 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
1727 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
1729 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1730 d = buf[0] + buf[1] + buf[2] + buf[3];
1735 for( ; j <= n - 4; j += 4 )
1737 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
1738 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
1744 float t = a[j] - b[j];
1751 float normL1_(const float* a, const float* b, int n)
1753 int j = 0; float d = 0.f;
1757 float CV_DECL_ALIGNED(16) buf[4];
1758 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
1759 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1760 __m128 absmask = _mm_load_ps((const float*)absbuf);
1762 for( ; j <= n - 8; j += 8 )
1764 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1765 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1766 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
1767 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
1769 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1770 d = buf[0] + buf[1] + buf[2] + buf[3];
1775 for( ; j <= n - 4; j += 4 )
1777 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1778 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1783 d += std::abs(a[j] - b[j]);
1787 int normL1_(const uchar* a, const uchar* b, int n)
1793 __m128i d0 = _mm_setzero_si128();
1795 for( ; j <= n - 16; j += 16 )
1797 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
1798 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
1800 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1803 for( ; j <= n - 4; j += 4 )
1805 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
1806 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
1808 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1810 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
1815 for( ; j <= n - 4; j += 4 )
1817 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1818 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1822 d += std::abs(a[j] - b[j]);
1826 static const uchar popCountTable[] =
1828 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1829 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1830 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1831 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1832 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1833 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1834 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1835 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
1838 static const uchar popCountTable2[] =
1840 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1841 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1842 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1843 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1844 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1845 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1846 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1847 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
1850 static const uchar popCountTable4[] =
1852 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1853 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1854 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1855 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1856 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1857 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1858 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1859 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1862 static int normHamming(const uchar* a, int n)
1864 int i = 0, result = 0;
1867 uint32x4_t bits = vmovq_n_u32(0);
1868 for (; i <= n - 16; i += 16) {
1869 uint8x16_t A_vec = vld1q_u8 (a + i);
1870 uint8x16_t bitsSet = vcntq_u8 (A_vec);
1871 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1872 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1873 bits = vaddq_u32(bits, bitSet4);
1875 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1876 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1877 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1880 for( ; i <= n - 4; i += 4 )
1881 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
1882 popCountTable[a[i+2]] + popCountTable[a[i+3]];
1884 result += popCountTable[a[i]];
1888 int normHamming(const uchar* a, const uchar* b, int n)
1890 int i = 0, result = 0;
1893 uint32x4_t bits = vmovq_n_u32(0);
1894 for (; i <= n - 16; i += 16) {
1895 uint8x16_t A_vec = vld1q_u8 (a + i);
1896 uint8x16_t B_vec = vld1q_u8 (b + i);
1897 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
1898 uint8x16_t bitsSet = vcntq_u8 (AxorB);
1899 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1900 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1901 bits = vaddq_u32(bits, bitSet4);
1903 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1904 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1905 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1908 for( ; i <= n - 4; i += 4 )
1909 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
1910 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
1912 result += popCountTable[a[i] ^ b[i]];
1916 static int normHamming(const uchar* a, int n, int cellSize)
1919 return normHamming(a, n);
1920 const uchar* tab = 0;
1922 tab = popCountTable2;
1923 else if( cellSize == 4 )
1924 tab = popCountTable4;
1926 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1927 int i = 0, result = 0;
1928 #if CV_ENABLE_UNROLLED
1929 for( ; i <= n - 4; i += 4 )
1930 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
1933 result += tab[a[i]];
1937 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1940 return normHamming(a, b, n);
1941 const uchar* tab = 0;
1943 tab = popCountTable2;
1944 else if( cellSize == 4 )
1945 tab = popCountTable4;
1947 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1948 int i = 0, result = 0;
1949 #if CV_ENABLE_UNROLLED
1950 for( ; i <= n - 4; i += 4 )
1951 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1952 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1955 result += tab[a[i] ^ b[i]];
1960 template<typename T, typename ST> int
1961 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1963 ST result = *_result;
1966 result = std::max(result, normInf<T, ST>(src, len*cn));
1970 for( int i = 0; i < len; i++, src += cn )
1973 for( int k = 0; k < cn; k++ )
1974 result = std::max(result, ST(std::abs(src[k])));
1981 template<typename T, typename ST> int
1982 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1984 ST result = *_result;
1987 result += normL1<T, ST>(src, len*cn);
1991 for( int i = 0; i < len; i++, src += cn )
1994 for( int k = 0; k < cn; k++ )
1995 result += std::abs(src[k]);
2002 template<typename T, typename ST> int
2003 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
2005 ST result = *_result;
2008 result += normL2Sqr<T, ST>(src, len*cn);
2012 for( int i = 0; i < len; i++, src += cn )
2015 for( int k = 0; k < cn; k++ )
2026 template<typename T, typename ST> int
2027 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2029 ST result = *_result;
2032 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
2036 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2039 for( int k = 0; k < cn; k++ )
2040 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
2047 template<typename T, typename ST> int
2048 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2050 ST result = *_result;
2053 result += normL1<T, ST>(src1, src2, len*cn);
2057 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2060 for( int k = 0; k < cn; k++ )
2061 result += std::abs(src1[k] - src2[k]);
2068 template<typename T, typename ST> int
2069 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
2071 ST result = *_result;
2074 result += normL2Sqr<T, ST>(src1, src2, len*cn);
2078 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
2081 for( int k = 0; k < cn; k++ )
2083 ST v = src1[k] - src2[k];
2093 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
2094 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
2095 { return norm##L##_(src, mask, r, len, cn); } \
2096 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
2097 const uchar* mask, ntype* r, int len, int cn) \
2098 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
2100 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
2101 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
2102 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
2103 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
2105 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
2106 CV_DEF_NORM_ALL(8s, schar, int, int, int)
2107 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
2108 CV_DEF_NORM_ALL(16s, short, int, int, double)
2109 CV_DEF_NORM_ALL(32s, int, int, double, double)
2110 CV_DEF_NORM_ALL(32f, float, float, double, double)
2111 CV_DEF_NORM_ALL(64f, double, double, double, double)
2114 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
2115 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
2117 static NormFunc getNormFunc(int normType, int depth)
2119 static NormFunc normTab[3][8] =
2122 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
2123 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
2126 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
2127 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
2130 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
2131 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
2135 return normTab[normType][depth];
2138 static NormDiffFunc getNormDiffFunc(int normType, int depth)
2140 static NormDiffFunc normDiffTab[3][8] =
2143 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
2144 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
2145 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
2146 (NormDiffFunc)normDiffInf_64f, 0
2149 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
2150 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
2151 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
2152 (NormDiffFunc)normDiffL1_64f, 0
2155 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
2156 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
2157 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
2158 (NormDiffFunc)normDiffL2_64f, 0
2162 return normDiffTab[normType][depth];
2167 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
2169 const ocl::Device & d = ocl::Device::getDefault();
2170 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2171 bool doubleSupport = d.doubleFPConfig() > 0,
2172 haveMask = _mask.kind() != _InputArray::NONE;
2174 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
2175 (!doubleSupport && depth == CV_64F))
2178 UMat src = _src.getUMat();
2180 if (normType == NORM_INF)
2182 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
2183 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
2186 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2189 bool unstype = depth == CV_8U || depth == CV_16U;
2191 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
2192 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2199 for (int i = 0; i < cn; ++i)
2202 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2212 double cv::norm( InputArray _src, int normType, InputArray _mask )
2214 normType &= NORM_TYPE_MASK;
2215 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2216 normType == NORM_L2 || normType == NORM_L2SQR ||
2217 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
2221 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
2222 ocl_norm(_src, normType, _mask, _result),
2226 Mat src = _src.getMat(), mask = _mask.getMat();
2227 int depth = src.depth(), cn = src.channels();
2229 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2230 size_t total_size = src.total();
2231 int rows = src.size[0], cols = (int)(total_size/rows);
2233 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
2234 && cols > 0 && (size_t)rows*cols == total_size
2235 && (normType == NORM_INF || normType == NORM_L1 ||
2236 normType == NORM_L2 || normType == NORM_L2SQR) )
2238 IppiSize sz = { cols, rows };
2239 int type = src.type();
2242 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2243 ippiMaskNormFuncC1 ippFuncC1 =
2244 normType == NORM_INF ?
2245 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
2246 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
2247 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
2248 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
2250 normType == NORM_L1 ?
2251 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
2252 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
2253 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
2254 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
2256 normType == NORM_L2 || normType == NORM_L2SQR ?
2257 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
2258 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
2259 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
2260 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
2265 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2266 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2268 setIppErrorStatus();
2270 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2271 ippiMaskNormFuncC3 ippFuncC3 =
2272 normType == NORM_INF ?
2273 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
2274 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
2275 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
2276 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
2278 normType == NORM_L1 ?
2279 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
2280 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
2281 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
2282 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
2284 normType == NORM_L2 || normType == NORM_L2SQR ?
2285 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
2286 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
2287 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
2288 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
2292 Ipp64f norm1, norm2, norm3;
2293 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2294 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2295 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2298 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2299 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2300 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2302 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2304 setIppErrorStatus();
2309 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2310 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
2311 ippiNormFuncHint ippFuncHint =
2312 normType == NORM_L1 ?
2313 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
2314 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
2315 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
2317 normType == NORM_L2 || normType == NORM_L2SQR ?
2318 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
2319 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
2320 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
2322 ippiNormFuncNoHint ippFuncNoHint =
2323 normType == NORM_INF ?
2324 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
2325 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
2326 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
2327 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
2328 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
2329 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
2330 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
2331 #if (IPP_VERSION_X100 >= 801)
2332 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2333 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2335 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
2336 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
2337 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
2339 normType == NORM_L1 ?
2340 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
2341 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
2342 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
2343 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
2344 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
2345 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
2346 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
2347 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
2348 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
2350 normType == NORM_L2 || normType == NORM_L2SQR ?
2351 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
2352 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
2353 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
2354 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
2355 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
2356 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
2357 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
2358 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
2359 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
2361 // Make sure only zero or one version of the function pointer is valid
2362 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2363 if( ippFuncHint || ippFuncNoHint )
2365 Ipp64f norm_array[4];
2366 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
2367 ippFuncNoHint(src.data, (int)src.step[0], sz, norm_array);
2370 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2371 for( int i = 1; i < cn; i++ )
2374 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2375 normType == NORM_L1 ? norm + norm_array[i] :
2376 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2379 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2381 setIppErrorStatus();
2387 if( src.isContinuous() && mask.empty() )
2389 size_t len = src.total()*cn;
2390 if( len == (size_t)(int)len )
2392 if( depth == CV_32F )
2394 const float* data = src.ptr<float>();
2396 if( normType == NORM_L2 )
2399 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2400 return std::sqrt(result);
2402 if( normType == NORM_L2SQR )
2405 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2408 if( normType == NORM_L1 )
2411 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
2414 if( normType == NORM_INF )
2417 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
2421 if( depth == CV_8U )
2423 const uchar* data = src.ptr<uchar>();
2425 if( normType == NORM_HAMMING )
2426 return normHamming(data, (int)len);
2428 if( normType == NORM_HAMMING2 )
2429 return normHamming(data, (int)len, 2);
2434 CV_Assert( mask.empty() || mask.type() == CV_8U );
2436 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2441 bitwise_and(src, mask, temp);
2442 return norm(temp, normType);
2444 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2446 const Mat* arrays[] = {&src, 0};
2448 NAryMatIterator it(arrays, ptrs);
2449 int total = (int)it.size;
2452 for( size_t i = 0; i < it.nplanes; i++, ++it )
2453 result += normHamming(ptrs[0], total, cellSize);
2458 NormFunc func = getNormFunc(normType >> 1, depth);
2459 CV_Assert( func != 0 );
2461 const Mat* arrays[] = {&src, &mask, 0};
2471 NAryMatIterator it(arrays, ptrs);
2472 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2473 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2474 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2476 int *ibuf = &result.i;
2481 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
2482 blockSize = std::min(blockSize, intSumBlockSize);
2484 esz = src.elemSize();
2487 for( size_t i = 0; i < it.nplanes; i++, ++it )
2489 for( j = 0; j < total; j += blockSize )
2491 int bsz = std::min(total - j, blockSize);
2492 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
2494 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2506 if( normType == NORM_INF )
2508 if( depth == CV_64F )
2510 else if( depth == CV_32F )
2511 result.d = result.f;
2513 result.d = result.i;
2515 else if( normType == NORM_L2 )
2516 result.d = std::sqrt(result.d);
2525 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2528 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2529 bool relative = (normType & NORM_RELATIVE) != 0;
2530 normType &= ~NORM_RELATIVE;
2531 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
2533 if ( !(normType == NORM_INF || normsum) )
2538 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
2539 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
2544 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
2545 false, _src2, relative ? &sc2[0] : NULL))
2551 for (int i = 0; i < cn; ++i)
2558 if (normType == NORM_L2)
2560 result = std::sqrt(result);
2566 result /= (s2 + DBL_EPSILON);
2575 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
2577 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
2581 CV_OCL_RUN_(_src1.isUMat(),
2582 ocl_norm(_src1, _src2, normType, _mask, _result),
2586 if( normType & CV_RELATIVE )
2588 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2589 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2591 normType &= NORM_TYPE_MASK;
2592 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
2593 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2594 size_t total_size = src1.total();
2595 int rows = src1.size[0], cols = (int)(total_size/rows);
2596 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2597 && cols > 0 && (size_t)rows*cols == total_size
2598 && (normType == NORM_INF || normType == NORM_L1 ||
2599 normType == NORM_L2 || normType == NORM_L2SQR) )
2601 IppiSize sz = { cols, rows };
2602 int type = src1.type();
2605 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2606 ippiMaskNormRelFuncC1 ippFuncC1 =
2607 normType == NORM_INF ?
2608 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
2609 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
2610 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
2611 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
2613 normType == NORM_L1 ?
2614 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
2615 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
2616 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
2617 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
2619 normType == NORM_L2 || normType == NORM_L2SQR ?
2620 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
2621 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
2622 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
2623 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
2628 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2629 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2630 setIppErrorStatus();
2635 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2636 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2637 ippiNormRelFuncNoHint ippFuncNoHint =
2638 normType == NORM_INF ?
2639 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
2640 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
2641 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
2642 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
2644 normType == NORM_L1 ?
2645 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
2646 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
2647 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
2649 normType == NORM_L2 || normType == NORM_L2SQR ?
2650 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
2651 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
2652 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
2654 ippiNormRelFuncHint ippFuncHint =
2655 normType == NORM_L1 ?
2656 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
2658 normType == NORM_L2 || normType == NORM_L2SQR ?
2659 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
2664 if( ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm) >= 0 )
2665 return (double)norm;
2666 setIppErrorStatus();
2671 if( ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
2672 return (double)norm;
2673 setIppErrorStatus();
2678 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
2681 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2682 int depth = src1.depth(), cn = src1.channels();
2685 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2686 normType == NORM_L2 || normType == NORM_L2SQR ||
2687 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2689 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2690 size_t total_size = src1.total();
2691 int rows = src1.size[0], cols = (int)(total_size/rows);
2692 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2693 && cols > 0 && (size_t)rows*cols == total_size
2694 && (normType == NORM_INF || normType == NORM_L1 ||
2695 normType == NORM_L2 || normType == NORM_L2SQR) )
2697 IppiSize sz = { cols, rows };
2698 int type = src1.type();
2701 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2702 ippiMaskNormDiffFuncC1 ippFuncC1 =
2703 normType == NORM_INF ?
2704 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
2705 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
2706 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
2707 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
2709 normType == NORM_L1 ?
2710 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
2711 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
2712 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
2713 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
2715 normType == NORM_L2 || normType == NORM_L2SQR ?
2716 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
2717 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
2718 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
2719 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
2724 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2725 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2726 setIppErrorStatus();
2728 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2729 ippiMaskNormDiffFuncC3 ippFuncC3 =
2730 normType == NORM_INF ?
2731 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
2732 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
2733 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
2734 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
2736 normType == NORM_L1 ?
2737 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
2738 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
2739 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
2740 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
2742 normType == NORM_L2 || normType == NORM_L2SQR ?
2743 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
2744 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
2745 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
2746 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
2750 Ipp64f norm1, norm2, norm3;
2751 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2752 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2753 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2756 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2757 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2758 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2760 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2762 setIppErrorStatus();
2767 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2768 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2769 ippiNormDiffFuncHint ippFuncHint =
2770 normType == NORM_L1 ?
2771 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
2772 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
2773 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
2775 normType == NORM_L2 || normType == NORM_L2SQR ?
2776 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
2777 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
2778 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
2780 ippiNormDiffFuncNoHint ippFuncNoHint =
2781 normType == NORM_INF ?
2782 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
2783 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
2784 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
2785 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
2786 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
2787 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
2788 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
2789 #if (IPP_VERSION_X100 >= 801)
2790 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2791 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2793 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
2794 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
2795 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
2797 normType == NORM_L1 ?
2798 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
2799 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
2800 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
2801 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
2802 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
2803 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
2804 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
2805 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
2806 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
2808 normType == NORM_L2 || normType == NORM_L2SQR ?
2809 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
2810 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
2811 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
2812 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
2813 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
2814 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
2815 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
2816 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
2817 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
2819 // Make sure only zero or one version of the function pointer is valid
2820 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2821 if( ippFuncHint || ippFuncNoHint )
2823 Ipp64f norm_array[4];
2824 IppStatus ret = ippFuncHint ? ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
2825 ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array);
2828 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2829 for( int i = 1; i < src1.channels(); i++ )
2832 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2833 normType == NORM_L1 ? norm + norm_array[i] :
2834 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2837 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2839 setIppErrorStatus();
2845 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
2847 size_t len = src1.total()*src1.channels();
2848 if( len == (size_t)(int)len )
2850 if( src1.depth() == CV_32F )
2852 const float* data1 = src1.ptr<float>();
2853 const float* data2 = src2.ptr<float>();
2855 if( normType == NORM_L2 )
2858 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2859 return std::sqrt(result);
2861 if( normType == NORM_L2SQR )
2864 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2867 if( normType == NORM_L1 )
2870 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
2873 if( normType == NORM_INF )
2876 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
2883 CV_Assert( mask.empty() || mask.type() == CV_8U );
2885 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2890 bitwise_xor(src1, src2, temp);
2891 bitwise_and(temp, mask, temp);
2892 return norm(temp, normType);
2894 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2896 const Mat* arrays[] = {&src1, &src2, 0};
2898 NAryMatIterator it(arrays, ptrs);
2899 int total = (int)it.size;
2902 for( size_t i = 0; i < it.nplanes; i++, ++it )
2903 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
2908 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
2909 CV_Assert( func != 0 );
2911 const Mat* arrays[] = {&src1, &src2, &mask, 0};
2922 NAryMatIterator it(arrays, ptrs);
2923 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2924 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2925 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2927 unsigned *ibuf = &result.u;
2932 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
2933 blockSize = std::min(blockSize, intSumBlockSize);
2935 esz = src1.elemSize();
2938 for( size_t i = 0; i < it.nplanes; i++, ++it )
2940 for( j = 0; j < total; j += blockSize )
2942 int bsz = std::min(total - j, blockSize);
2943 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
2945 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2958 if( normType == NORM_INF )
2960 if( depth == CV_64F )
2962 else if( depth == CV_32F )
2963 result.d = result.f;
2965 result.d = result.u;
2967 else if( normType == NORM_L2 )
2968 result.d = std::sqrt(result.d);
2974 ///////////////////////////////////// batch distance ///////////////////////////////////////
2979 template<typename _Tp, typename _Rt>
2980 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
2981 int nvecs, int len, _Rt* dist, const uchar* mask)
2983 step2 /= sizeof(src2[0]);
2986 for( int i = 0; i < nvecs; i++ )
2987 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
2991 _Rt val0 = std::numeric_limits<_Rt>::max();
2992 for( int i = 0; i < nvecs; i++ )
2993 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
2997 template<typename _Tp, typename _Rt>
2998 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
2999 int nvecs, int len, _Rt* dist, const uchar* mask)
3001 step2 /= sizeof(src2[0]);
3004 for( int i = 0; i < nvecs; i++ )
3005 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
3009 _Rt val0 = std::numeric_limits<_Rt>::max();
3010 for( int i = 0; i < nvecs; i++ )
3011 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
3015 template<typename _Tp, typename _Rt>
3016 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
3017 int nvecs, int len, _Rt* dist, const uchar* mask)
3019 step2 /= sizeof(src2[0]);
3022 for( int i = 0; i < nvecs; i++ )
3023 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
3027 _Rt val0 = std::numeric_limits<_Rt>::max();
3028 for( int i = 0; i < nvecs; i++ )
3029 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
3033 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
3034 int nvecs, int len, int* dist, const uchar* mask)
3036 step2 /= sizeof(src2[0]);
3039 for( int i = 0; i < nvecs; i++ )
3040 dist[i] = normHamming(src1, src2 + step2*i, len);
3045 for( int i = 0; i < nvecs; i++ )
3046 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
3050 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
3051 int nvecs, int len, int* dist, const uchar* mask)
3053 step2 /= sizeof(src2[0]);
3056 for( int i = 0; i < nvecs; i++ )
3057 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
3062 for( int i = 0; i < nvecs; i++ )
3063 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
3067 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3068 int nvecs, int len, int* dist, const uchar* mask)
3070 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3073 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3074 int nvecs, int len, float* dist, const uchar* mask)
3076 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3079 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
3080 int nvecs, int len, int* dist, const uchar* mask)
3082 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
3085 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3086 int nvecs, int len, float* dist, const uchar* mask)
3088 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3091 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
3092 int nvecs, int len, float* dist, const uchar* mask)
3094 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
3097 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
3098 int nvecs, int len, float* dist, const uchar* mask)
3100 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3103 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
3104 int nvecs, int len, float* dist, const uchar* mask)
3106 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3109 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
3110 int nvecs, int len, float* dist, const uchar* mask)
3112 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
3115 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
3116 int nvecs, int len, uchar* dist, const uchar* mask);
3119 struct BatchDistInvoker : public ParallelLoopBody
3121 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
3122 Mat& _dist, Mat& _nidx, int _K,
3123 const Mat& _mask, int _update,
3124 BatchDistFunc _func)
3136 void operator()(const Range& range) const
3138 AutoBuffer<int> buf(src2->rows);
3141 for( int i = range.start; i < range.end; i++ )
3143 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
3144 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
3148 int* nidxptr = nidx->ptr<int>(i);
3149 // since positive float's can be compared just like int's,
3150 // we handle both CV_32S and CV_32F cases with a single branch
3151 int* distptr = (int*)dist->ptr(i);
3155 for( j = 0; j < src2->rows; j++ )
3158 if( d < distptr[K-1] )
3160 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
3162 nidxptr[k+1] = nidxptr[k];
3163 distptr[k+1] = distptr[k];
3165 nidxptr[k+1] = j + update;
3185 void cv::batchDistance( InputArray _src1, InputArray _src2,
3186 OutputArray _dist, int dtype, OutputArray _nidx,
3187 int normType, int K, InputArray _mask,
3188 int update, bool crosscheck )
3190 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3191 int type = src1.type();
3192 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
3193 (type == CV_32F || type == CV_8U));
3194 CV_Assert( _nidx.needed() == (K > 0) );
3198 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
3200 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
3202 K = std::min(K, src2.rows);
3204 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
3205 Mat dist = _dist.getMat(), nidx;
3206 if( _nidx.needed() )
3208 _nidx.create(dist.size(), CV_32S);
3209 nidx = _nidx.getMat();
3212 if( update == 0 && K > 0 )
3214 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
3215 nidx = Scalar::all(-1);
3220 CV_Assert( K == 1 && update == 0 && mask.empty() );
3222 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
3224 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
3225 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
3226 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
3227 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
3228 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
3229 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
3230 if( dtype == CV_32S )
3232 for( int i = 0; i < tdist.rows; i++ )
3234 int idx = tidx.at<int>(i);
3235 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
3238 dist.at<int>(idx) = d;
3239 nidx.at<int>(idx) = i + update;
3245 for( int i = 0; i < tdist.rows; i++ )
3247 int idx = tidx.at<int>(i);
3248 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
3251 dist.at<float>(idx) = d;
3252 nidx.at<int>(idx) = i + update;
3259 BatchDistFunc func = 0;
3262 if( normType == NORM_L1 && dtype == CV_32S )
3263 func = (BatchDistFunc)batchDistL1_8u32s;
3264 else if( normType == NORM_L1 && dtype == CV_32F )
3265 func = (BatchDistFunc)batchDistL1_8u32f;
3266 else if( normType == NORM_L2SQR && dtype == CV_32S )
3267 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
3268 else if( normType == NORM_L2SQR && dtype == CV_32F )
3269 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
3270 else if( normType == NORM_L2 && dtype == CV_32F )
3271 func = (BatchDistFunc)batchDistL2_8u32f;
3272 else if( normType == NORM_HAMMING && dtype == CV_32S )
3273 func = (BatchDistFunc)batchDistHamming;
3274 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
3275 func = (BatchDistFunc)batchDistHamming2;
3277 else if( type == CV_32F && dtype == CV_32F )
3279 if( normType == NORM_L1 )
3280 func = (BatchDistFunc)batchDistL1_32f;
3281 else if( normType == NORM_L2SQR )
3282 func = (BatchDistFunc)batchDistL2Sqr_32f;
3283 else if( normType == NORM_L2 )
3284 func = (BatchDistFunc)batchDistL2_32f;
3288 CV_Error_(CV_StsUnsupportedFormat,
3289 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
3290 type, dtype, normType));
3292 parallel_for_(Range(0, src1.rows),
3293 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
3297 void cv::findNonZero( InputArray _src, OutputArray _idx )
3299 Mat src = _src.getMat();
3300 CV_Assert( src.type() == CV_8UC1 );
3301 int n = countNonZero(src);
3302 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
3304 _idx.create(n, 1, CV_32SC2);
3305 Mat idx = _idx.getMat();
3306 CV_Assert(idx.isContinuous());
3307 Point* idx_ptr = (Point*)idx.data;
3309 for( int i = 0; i < src.rows; i++ )
3311 const uchar* bin_ptr = src.ptr(i);
3312 for( int j = 0; j < src.cols; j++ )
3314 *idx_ptr++ = Point(j, i);
3318 double cv::PSNR(InputArray _src1, InputArray _src2)
3320 CV_Assert( _src1.depth() == CV_8U );
3321 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
3322 return 20*log10(255./(diff+DBL_EPSILON));
3326 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
3328 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
3329 if( CV_IS_IMAGE(srcarr) )
3331 int coi = cvGetImageCOI((IplImage*)srcarr);
3334 CV_Assert( 0 < coi && coi <= 4 );
3335 sum = cv::Scalar(sum[coi-1]);
3341 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
3343 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3344 if( img.channels() > 1 )
3345 cv::extractImageCOI(imgarr, img);
3346 return countNonZero(img);
3351 cvAvg( const void* imgarr, const void* maskarr )
3353 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3354 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
3355 if( CV_IS_IMAGE(imgarr) )
3357 int coi = cvGetImageCOI((IplImage*)imgarr);
3360 CV_Assert( 0 < coi && coi <= 4 );
3361 mean = cv::Scalar(mean[coi-1]);
3369 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
3371 cv::Scalar mean, sdv;
3375 mask = cv::cvarrToMat(maskarr);
3377 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
3379 if( CV_IS_IMAGE(imgarr) )
3381 int coi = cvGetImageCOI((IplImage*)imgarr);
3384 CV_Assert( 0 < coi && coi <= 4 );
3385 mean = cv::Scalar(mean[coi-1]);
3386 sdv = cv::Scalar(sdv[coi-1]);
3391 *(cv::Scalar*)_mean = mean;
3393 *(cv::Scalar*)_sdv = sdv;
3398 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
3399 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
3401 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
3403 mask = cv::cvarrToMat(maskarr);
3404 if( img.channels() > 1 )
3405 cv::extractImageCOI(imgarr, img);
3407 cv::minMaxLoc( img, _minVal, _maxVal,
3408 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
3413 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
3422 a = cv::cvarrToMat(imgA, false, true, 1);
3424 mask = cv::cvarrToMat(maskarr);
3426 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
3427 cv::extractImageCOI(imgA, a);
3430 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
3432 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
3433 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
3434 cv::extractImageCOI(imgB, b);
3436 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);