1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
47 #include "opencl_kernels.hpp"
52 template<typename T> static inline Scalar rawToScalar(const T& v)
55 typedef typename DataType<T>::channel_type T1;
56 int i, n = DataType<T>::channels;
57 for( i = 0; i < n; i++ )
58 s.val[i] = ((T1*)&v)[i];
62 /****************************************************************************************\
64 \****************************************************************************************/
66 template<typename T, typename ST>
67 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn )
78 #if CV_ENABLE_UNROLLED
79 for(; i <= len - 4; i += 4, src += cn*4 )
80 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3];
82 for( ; i < len; i++, src += cn )
88 ST s0 = dst[0], s1 = dst[1];
89 for( i = 0; i < len; i++, src += cn )
99 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
100 for( i = 0; i < len; i++, src += cn )
111 for( ; k < cn; k += 4 )
114 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3];
115 for( i = 0; i < len; i++, src += cn )
117 s0 += src[0]; s1 += src[1];
118 s2 += src[2]; s3 += src[3];
132 for( i = 0; i < len; i++ )
142 ST s0 = dst[0], s1 = dst[1], s2 = dst[2];
143 for( i = 0; i < len; i++, src += 3 )
157 for( i = 0; i < len; i++, src += cn )
161 #if CV_ENABLE_UNROLLED
162 for( ; k <= cn - 4; k += 4 )
165 s0 = dst[k] + src[k];
166 s1 = dst[k+1] + src[k+1];
167 dst[k] = s0; dst[k+1] = s1;
168 s0 = dst[k+2] + src[k+2];
169 s1 = dst[k+3] + src[k+3];
170 dst[k+2] = s0; dst[k+3] = s1;
182 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn )
183 { return sum_(src, mask, dst, len, cn); }
185 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn )
186 { return sum_(src, mask, dst, len, cn); }
188 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn )
189 { return sum_(src, mask, dst, len, cn); }
191 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn )
192 { return sum_(src, mask, dst, len, cn); }
194 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn )
195 { return sum_(src, mask, dst, len, cn); }
197 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn )
198 { return sum_(src, mask, dst, len, cn); }
200 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn )
201 { return sum_(src, mask, dst, len, cn); }
203 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int);
205 static SumFunc getSumFunc(int depth)
207 static SumFunc sumTab[] =
209 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s,
210 (SumFunc)sum16u, (SumFunc)sum16s,
212 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f,
216 return sumTab[depth];
220 static int countNonZero_(const T* src, int len )
223 #if CV_ENABLE_UNROLLED
224 for(; i <= len - 4; i += 4 )
225 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0);
227 for( ; i < len; i++ )
232 static int countNonZero8u( const uchar* src, int len )
238 __m128i pattern = _mm_setzero_si128 ();
239 static uchar tab[256];
240 static volatile bool initialized = false;
243 // we compute inverse popcount table,
244 // since we pass (img[x] == 0) mask as index in the table.
245 for( int j = 0; j < 256; j++ )
248 for( int mask = 1; mask < 256; mask += mask )
249 val += (j & mask) == 0;
255 for (; i<=len-16; i+=16)
257 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i));
258 int val = _mm_movemask_epi8(_mm_cmpeq_epi8(r0, pattern));
259 nz += tab[val & 255] + tab[val >> 8];
263 for( ; i < len; i++ )
268 static int countNonZero16u( const ushort* src, int len )
269 { return countNonZero_(src, len); }
271 static int countNonZero32s( const int* src, int len )
272 { return countNonZero_(src, len); }
274 static int countNonZero32f( const float* src, int len )
275 { return countNonZero_(src, len); }
277 static int countNonZero64f( const double* src, int len )
278 { return countNonZero_(src, len); }
280 typedef int (*CountNonZeroFunc)(const uchar*, int);
282 static CountNonZeroFunc getCountNonZeroTab(int depth)
284 static CountNonZeroFunc countNonZeroTab[] =
286 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u),
287 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u),
288 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f),
289 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0
292 return countNonZeroTab[depth];
295 template<typename T, typename ST, typename SQT>
296 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn )
309 for( i = 0; i < len; i++, src += cn )
312 s0 += v; sq0 += (SQT)v*v;
319 ST s0 = sum[0], s1 = sum[1];
320 SQT sq0 = sqsum[0], sq1 = sqsum[1];
321 for( i = 0; i < len; i++, src += cn )
323 T v0 = src[0], v1 = src[1];
324 s0 += v0; sq0 += (SQT)v0*v0;
325 s1 += v1; sq1 += (SQT)v1*v1;
327 sum[0] = s0; sum[1] = s1;
328 sqsum[0] = sq0; sqsum[1] = sq1;
332 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
333 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
334 for( i = 0; i < len; i++, src += cn )
336 T v0 = src[0], v1 = src[1], v2 = src[2];
337 s0 += v0; sq0 += (SQT)v0*v0;
338 s1 += v1; sq1 += (SQT)v1*v1;
339 s2 += v2; sq2 += (SQT)v2*v2;
341 sum[0] = s0; sum[1] = s1; sum[2] = s2;
342 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
345 for( ; k < cn; k += 4 )
348 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3];
349 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3];
350 for( i = 0; i < len; i++, src += cn )
353 v0 = src[0], v1 = src[1];
354 s0 += v0; sq0 += (SQT)v0*v0;
355 s1 += v1; sq1 += (SQT)v1*v1;
356 v0 = src[2], v1 = src[3];
357 s2 += v0; sq2 += (SQT)v0*v0;
358 s3 += v1; sq3 += (SQT)v1*v1;
360 sum[k] = s0; sum[k+1] = s1;
361 sum[k+2] = s2; sum[k+3] = s3;
362 sqsum[k] = sq0; sqsum[k+1] = sq1;
363 sqsum[k+2] = sq2; sqsum[k+3] = sq3;
374 for( i = 0; i < len; i++ )
378 s0 += v; sq0 += (SQT)v*v;
386 ST s0 = sum[0], s1 = sum[1], s2 = sum[2];
387 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2];
388 for( i = 0; i < len; i++, src += 3 )
391 T v0 = src[0], v1 = src[1], v2 = src[2];
392 s0 += v0; sq0 += (SQT)v0*v0;
393 s1 += v1; sq1 += (SQT)v1*v1;
394 s2 += v2; sq2 += (SQT)v2*v2;
397 sum[0] = s0; sum[1] = s1; sum[2] = s2;
398 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2;
402 for( i = 0; i < len; i++, src += cn )
405 for( int k = 0; k < cn; k++ )
409 SQT sq = sqsum[k] + (SQT)v*v;
410 sum[k] = s; sqsum[k] = sq;
419 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
420 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
422 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn )
423 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
425 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
426 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
428 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn )
429 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
431 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
432 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
434 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
435 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
437 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn )
438 { return sumsqr_(src, mask, sum, sqsum, len, cn); }
440 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int);
442 static SumSqrFunc getSumSqrTab(int depth)
444 static SumSqrFunc sumSqrTab[] =
446 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s,
447 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0
450 return sumSqrTab[depth];
455 template <typename T> Scalar ocl_part_sum(Mat m)
457 CV_Assert(m.rows == 1);
459 Scalar s = Scalar::all(0);
460 int cn = m.channels();
461 const T * const ptr = m.ptr<T>(0);
463 for (int x = 0, w = m.cols * cn; x < w; )
464 for (int c = 0; c < cn; ++c, ++x)
470 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 };
472 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray() )
474 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR);
476 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
477 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
479 if ( (!doubleSupport && depth == CV_64F) || cn > 4 )
482 int dbsize = ocl::Device::getDefault().maxComputeUnits();
483 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
485 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth),
486 dtype = CV_MAKE_TYPE(ddepth, cn);
487 bool haveMask = _mask.kind() != _InputArray::NONE;
488 CV_Assert(!haveMask || _mask.type() == CV_8UC1);
490 int wgs2_aligned = 1;
491 while (wgs2_aligned < (int)wgs)
495 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" };
497 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
498 format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
499 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
500 ocl::typeToStr(type), ocl::typeToStr(depth),
501 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), ddepth, cn,
502 ocl::convertTypeStr(depth, ddepth, cn, cvt),
503 opMap[sum_op], (int)wgs, wgs2_aligned,
504 doubleSupport ? " -D DOUBLE_SUPPORT" : "",
505 haveMask ? " -D HAVE_MASK" : ""));
509 UMat src = _src.getUMat(), db(1, dbsize, dtype), mask = _mask.getUMat();
511 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
512 dbarg = ocl::KernelArg::PtrWriteOnly(db),
513 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
516 k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg, maskarg);
518 k.args(srcarg, src.cols, (int)src.total(), dbsize, dbarg);
520 size_t globalsize = dbsize * wgs;
521 if (k.run(1, &globalsize, &wgs, false))
523 typedef Scalar (*part_sum)(Mat m);
524 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> },
525 func = funcs[ddepth - CV_32S];
526 res = func(db.getMat(ACCESS_READ));
536 cv::Scalar cv::sum( InputArray _src )
540 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
541 ocl_sum(_src, _res, OCL_OP_SUM),
545 Mat src = _src.getMat();
546 int k, cn = src.channels(), depth = src.depth();
548 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
549 size_t total_size = src.total();
550 int rows = src.size[0], cols = (int)(total_size/rows);
551 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
553 IppiSize sz = { cols, rows };
554 int type = src.type();
555 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
556 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *);
557 ippiSumFuncHint ippFuncHint =
558 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R :
559 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R :
560 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R :
562 ippiSumFuncNoHint ippFuncNoHint =
563 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R :
564 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R :
565 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R :
566 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R :
567 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R :
568 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R :
569 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R :
570 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R :
571 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R :
573 CV_Assert(!ippFuncHint || !ippFuncNoHint);
574 if( ippFuncHint || ippFuncNoHint )
577 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
578 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
582 for( int i = 0; i < cn; i++ )
591 SumFunc func = getSumFunc(depth);
593 CV_Assert( cn <= 4 && func != 0 );
595 const Mat* arrays[] = {&src, 0};
597 NAryMatIterator it(arrays, ptrs);
599 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
601 AutoBuffer<int> _buf;
602 int* buf = (int*)&s[0];
604 bool blockSum = depth < CV_32S;
608 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
609 blockSize = std::min(blockSize, intSumBlockSize);
613 for( k = 0; k < cn; k++ )
615 esz = src.elemSize();
618 for( size_t i = 0; i < it.nplanes; i++, ++it )
620 for( j = 0; j < total; j += blockSize )
622 int bsz = std::min(total - j, blockSize);
623 func( ptrs[0], 0, (uchar*)buf, bsz, cn );
625 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
627 for( k = 0; k < cn; k++ )
644 static bool ocl_countNonZero( InputArray _src, int & res )
646 int type = _src.type(), depth = CV_MAT_DEPTH(type);
647 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
649 if (depth == CV_64F && !doubleSupport)
652 int dbsize = ocl::Device::getDefault().maxComputeUnits();
653 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
655 int wgs2_aligned = 1;
656 while (wgs2_aligned < (int)wgs)
660 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
661 format("-D srcT=%s -D OP_COUNT_NON_ZERO -D WGS=%d -D WGS2_ALIGNED=%d%s",
662 ocl::typeToStr(type), (int)wgs,
663 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
667 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1);
668 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
669 dbsize, ocl::KernelArg::PtrWriteOnly(db));
671 size_t globalsize = dbsize * wgs;
672 if (k.run(1, &globalsize, &wgs, true))
673 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true;
681 int cv::countNonZero( InputArray _src )
683 int type = _src.type(), cn = CV_MAT_CN(type);
684 CV_Assert( cn == 1 );
688 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
689 ocl_countNonZero(_src, res),
693 Mat src = _src.getMat();
695 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0
696 if (src.dims <= 2 || src.isContinuous())
698 IppiSize roiSize = { src.cols, src.rows };
699 Ipp32s count = 0, srcstep = (Ipp32s)src.step;
700 IppStatus status = (IppStatus)-1;
702 if (src.isContinuous())
704 roiSize.width = (Ipp32s)src.total();
706 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type);
709 int depth = CV_MAT_DEPTH(type);
711 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0);
712 else if (depth == CV_32F)
713 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0);
716 return (Ipp32s)src.total() - count;
721 CountNonZeroFunc func = getCountNonZeroTab(src.depth());
722 CV_Assert( func != 0 );
724 const Mat* arrays[] = {&src, 0};
726 NAryMatIterator it(arrays, ptrs);
727 int total = (int)it.size, nz = 0;
729 for( size_t i = 0; i < it.nplanes; i++, ++it )
730 nz += func( ptrs[0], total );
735 cv::Scalar cv::mean( InputArray _src, InputArray _mask )
737 Mat src = _src.getMat(), mask = _mask.getMat();
738 CV_Assert( mask.empty() || mask.type() == CV_8U );
740 int k, cn = src.channels(), depth = src.depth();
742 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
743 size_t total_size = src.total();
744 int rows = src.size[0], cols = (int)(total_size/rows);
745 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
747 IppiSize sz = { cols, rows };
748 int type = src.type();
751 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
752 ippiMaskMeanFuncC1 ippFuncC1 =
753 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
754 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
755 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR :
760 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &res) >= 0 )
764 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
765 ippiMaskMeanFuncC3 ippFuncC3 =
766 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
767 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
768 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR :
772 Ipp64f res1, res2, res3;
773 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &res1) >= 0 &&
774 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &res2) >= 0 &&
775 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &res3) >= 0 )
777 return Scalar(res1, res2, res3);
784 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm);
785 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *);
786 ippiMeanFuncHint ippFuncHint =
787 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R :
788 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R :
789 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R :
791 ippiMeanFuncNoHint ippFuncNoHint =
792 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R :
793 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R :
794 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R :
795 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R :
796 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R :
797 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R :
798 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R :
799 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R :
800 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R :
802 // Make sure only zero or one version of the function pointer is valid
803 CV_Assert(!ippFuncHint || !ippFuncNoHint);
804 if( ippFuncHint || ippFuncNoHint )
807 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, res, ippAlgHintAccurate) :
808 ippFuncNoHint(src.data, (int)src.step[0], sz, res);
812 for( int i = 0; i < cn; i++ )
822 SumFunc func = getSumFunc(depth);
824 CV_Assert( cn <= 4 && func != 0 );
826 const Mat* arrays[] = {&src, &mask, 0};
828 NAryMatIterator it(arrays, ptrs);
830 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
832 AutoBuffer<int> _buf;
833 int* buf = (int*)&s[0];
834 bool blockSum = depth <= CV_16S;
835 size_t esz = 0, nz0 = 0;
839 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15);
840 blockSize = std::min(blockSize, intSumBlockSize);
844 for( k = 0; k < cn; k++ )
846 esz = src.elemSize();
849 for( size_t i = 0; i < it.nplanes; i++, ++it )
851 for( j = 0; j < total; j += blockSize )
853 int bsz = std::min(total - j, blockSize);
854 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn );
857 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
859 for( k = 0; k < cn; k++ )
871 return s*(nz0 ? 1./nz0 : 0);
878 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
880 bool haveMask = _mask.kind() != _InputArray::NONE;
883 if (!ocl_sum(_src, mean, OCL_OP_SUM, _mask))
885 if (!ocl_sum(_src, stddev, OCL_OP_SUM_SQR, _mask))
888 int nz = haveMask ? countNonZero(_mask) : (int)_src.total();
889 double total = nz != 0 ? 1.0 / nz : 0;
890 int k, j, cn = _src.channels();
891 for (int i = 0; i < cn; ++i)
894 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.));
897 for( j = 0; j < 2; j++ )
899 const double * const sptr = j == 0 ? &mean[0] : &stddev[0];
900 _OutputArray _dst = j == 0 ? _mean : _sdv;
904 if( !_dst.fixedSize() )
905 _dst.create(cn, 1, CV_64F, -1, true);
906 Mat dst = _dst.getMat();
907 int dcn = (int)dst.total();
908 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
909 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
910 double* dptr = dst.ptr<double>();
911 for( k = 0; k < cn; k++ )
913 for( ; k < dcn; k++ )
924 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
926 CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2,
927 ocl_meanStdDev(_src, _mean, _sdv, _mask))
929 Mat src = _src.getMat(), mask = _mask.getMat();
930 CV_Assert( mask.empty() || mask.type() == CV_8U );
932 int k, cn = src.channels(), depth = src.depth();
934 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
935 size_t total_size = src.total();
936 int rows = src.size[0], cols = (int)(total_size/rows);
937 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
940 Ipp64f stddev_temp[3];
941 Ipp64f *pmean = &mean_temp[0];
942 Ipp64f *pstddev = &stddev_temp[0];
947 if( !_mean.fixedSize() )
948 _mean.create(cn, 1, CV_64F, -1, true);
949 mean = _mean.getMat();
950 dcn_mean = (int)mean.total();
951 pmean = (Ipp64f *)mean.data;
956 if( !_sdv.fixedSize() )
957 _sdv.create(cn, 1, CV_64F, -1, true);
958 stddev = _sdv.getMat();
959 dcn_stddev = (int)stddev.total();
960 pstddev = (Ipp64f *)stddev.data;
962 for( int c = cn; c < dcn_mean; c++ )
964 for( int c = cn; c < dcn_stddev; c++ )
966 IppiSize sz = { cols, rows };
967 int type = src.type();
970 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
971 ippiMaskMeanStdDevFuncC1 ippFuncC1 =
972 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
973 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
974 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR :
978 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, pmean, pstddev) >= 0 )
982 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
983 ippiMaskMeanStdDevFuncC3 ippFuncC3 =
984 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
985 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
986 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR :
990 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
991 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
992 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
999 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *);
1000 ippiMeanStdDevFuncC1 ippFuncC1 =
1001 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R :
1002 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R :
1003 #if (IPP_VERSION_X100 >= 801)
1004 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0
1009 if( ippFuncC1(src.data, (int)src.step[0], sz, pmean, pstddev) >= 0 )
1011 setIppErrorStatus();
1013 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
1014 ippiMeanStdDevFuncC3 ippFuncC3 =
1015 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR :
1016 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR :
1017 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR :
1021 if( ippFuncC3(src.data, (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 &&
1022 ippFuncC3(src.data, (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 &&
1023 ippFuncC3(src.data, (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 )
1025 setIppErrorStatus();
1032 SumSqrFunc func = getSumSqrTab(depth);
1034 CV_Assert( func != 0 );
1036 const Mat* arrays[] = {&src, &mask, 0};
1038 NAryMatIterator it(arrays, ptrs);
1039 int total = (int)it.size, blockSize = total, intSumBlockSize = 0;
1040 int j, count = 0, nz0 = 0;
1041 AutoBuffer<double> _buf(cn*4);
1042 double *s = (double*)_buf, *sq = s + cn;
1043 int *sbuf = (int*)s, *sqbuf = (int*)sq;
1044 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S;
1047 for( k = 0; k < cn; k++ )
1052 intSumBlockSize = 1 << 15;
1053 blockSize = std::min(blockSize, intSumBlockSize);
1054 sbuf = (int*)(sq + cn);
1057 for( k = 0; k < cn; k++ )
1058 sbuf[k] = sqbuf[k] = 0;
1059 esz = src.elemSize();
1062 for( size_t i = 0; i < it.nplanes; i++, ++it )
1064 for( j = 0; j < total; j += blockSize )
1066 int bsz = std::min(total - j, blockSize);
1067 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn );
1070 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
1072 for( k = 0; k < cn; k++ )
1079 for( k = 0; k < cn; k++ )
1093 double scale = nz0 ? 1./nz0 : 0.;
1094 for( k = 0; k < cn; k++ )
1097 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.));
1100 for( j = 0; j < 2; j++ )
1102 const double* sptr = j == 0 ? s : sq;
1103 _OutputArray _dst = j == 0 ? _mean : _sdv;
1104 if( !_dst.needed() )
1107 if( !_dst.fixedSize() )
1108 _dst.create(cn, 1, CV_64F, -1, true);
1109 Mat dst = _dst.getMat();
1110 int dcn = (int)dst.total();
1111 CV_Assert( dst.type() == CV_64F && dst.isContinuous() &&
1112 (dst.cols == 1 || dst.rows == 1) && dcn >= cn );
1113 double* dptr = dst.ptr<double>();
1114 for( k = 0; k < cn; k++ )
1116 for( ; k < dcn; k++ )
1121 /****************************************************************************************\
1123 \****************************************************************************************/
1128 template<typename T, typename WT> static void
1129 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal,
1130 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx )
1132 WT minVal = *_minVal, maxVal = *_maxVal;
1133 size_t minIdx = *_minIdx, maxIdx = *_maxIdx;
1137 for( int i = 0; i < len; i++ )
1143 minIdx = startIdx + i;
1148 maxIdx = startIdx + i;
1154 for( int i = 0; i < len; i++ )
1157 if( mask[i] && val < minVal )
1160 minIdx = startIdx + i;
1162 if( mask[i] && val > maxVal )
1165 maxIdx = startIdx + i;
1176 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval,
1177 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1178 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1180 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval,
1181 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1182 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1184 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval,
1185 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1186 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1188 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval,
1189 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1190 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1192 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval,
1193 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1194 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1196 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval,
1197 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1198 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1200 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval,
1201 size_t* minidx, size_t* maxidx, int len, size_t startidx )
1202 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); }
1204 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t);
1206 static MinMaxIdxFunc getMinmaxTab(int depth)
1208 static MinMaxIdxFunc minmaxTab[] =
1210 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s),
1211 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s),
1212 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s),
1213 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f),
1217 return minmaxTab[depth];
1220 static void ofs2idx(const Mat& a, size_t ofs, int* idx)
1226 for( i = d-1; i >= 0; i-- )
1229 idx[i] = (int)(ofs % sz);
1235 for( i = d-1; i >= 0; i-- )
1242 template <typename T>
1243 void getMinMaxRes(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double* minVal,
1244 double* maxVal, int* minLoc, int* maxLoc, const int groupnum, const int cn, const int cols)
1246 T min = std::numeric_limits<T>::max();
1247 T max = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min();
1248 int minloc = INT_MAX, maxloc = INT_MAX;
1249 for (int i = 0; i < groupnum; i++)
1251 T current_min = minv.at<T>(0,i);
1252 T current_max = maxv.at<T>(0,i);
1253 T oldmin = min, oldmax = max;
1254 min = std::min(min, current_min);
1255 max = std::max(max, current_max);
1258 int current_minloc = minl.at<int>(0,i);
1259 int current_maxloc = maxl.at<int>(0,i);
1260 if(current_minloc < 0 || current_maxloc < 0) continue;
1261 minloc = (oldmin == current_min) ? std::min(minloc, current_minloc) : (oldmin < current_min) ? minloc : current_minloc;
1262 maxloc = (oldmax == current_max) ? std::min(maxloc, current_maxloc) : (oldmax > current_max) ? maxloc : current_maxloc;
1265 bool zero_mask = (maxloc == INT_MAX) || (minloc == INT_MAX);
1267 *minVal = zero_mask ? 0 : (double)min;
1269 *maxVal = zero_mask ? 0 : (double)max;
1272 minLoc[0] = zero_mask ? -1 : minloc/cols;
1273 minLoc[1] = zero_mask ? -1 : minloc%cols;
1277 maxLoc[0] = zero_mask ? -1 : maxloc/cols;
1278 maxLoc[1] = zero_mask ? -1 : maxloc%cols;
1282 typedef void (*getMinMaxResFunc)(const Mat &minv, const Mat &maxv, const Mat &minl, const Mat &maxl, double *minVal,
1283 double *maxVal, int *minLoc, int *maxLoc, const int gropunum, const int cn, const int cols);
1285 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask)
1287 CV_Assert( (_src.channels() == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1288 (_src.channels() >= 1 && _mask.empty() && !minLoc && !maxLoc) );
1290 int type = _src.type(), depth = CV_MAT_DEPTH(type);
1291 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
1293 if (depth == CV_64F && !doubleSupport)
1296 int groupnum = ocl::Device::getDefault().maxComputeUnits();
1297 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
1299 int wgs2_aligned = 1;
1300 while (wgs2_aligned < (int)wgs)
1304 String opts = format("-D DEPTH_%d -D srcT=%s -D OP_MIN_MAX_LOC%s -D WGS=%d -D WGS2_ALIGNED=%d%s",
1305 depth, ocl::typeToStr(depth), _mask.empty() ? "" : "_MASK", (int)wgs,
1306 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "");
1308 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts);
1312 UMat src = _src.getUMat(), minval(1, groupnum, src.type()),
1313 maxval(1, groupnum, src.type()), minloc( 1, groupnum, CV_32SC1),
1314 maxloc( 1, groupnum, CV_32SC1), mask;
1316 mask = _mask.getUMat();
1318 if (src.channels() > 1)
1319 src = src.reshape(1);
1322 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
1323 groupnum, ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval),
1324 ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc));
1326 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), groupnum,
1327 ocl::KernelArg::PtrWriteOnly(minval), ocl::KernelArg::PtrWriteOnly(maxval),
1328 ocl::KernelArg::PtrWriteOnly(minloc), ocl::KernelArg::PtrWriteOnly(maxloc), ocl::KernelArg::ReadOnlyNoSize(mask));
1330 size_t globalsize = groupnum * wgs;
1331 if (!k.run(1, &globalsize, &wgs, false))
1334 Mat minv = minval.getMat(ACCESS_READ), maxv = maxval.getMat(ACCESS_READ),
1335 minl = minloc.getMat(ACCESS_READ), maxl = maxloc.getMat(ACCESS_READ);
1337 static getMinMaxResFunc functab[7] =
1339 getMinMaxRes<uchar>,
1341 getMinMaxRes<ushort>,
1342 getMinMaxRes<short>,
1344 getMinMaxRes<float>,
1345 getMinMaxRes<double>
1348 getMinMaxResFunc func;
1350 func = functab[depth];
1351 func(minv, maxv, minl, maxl, minVal, maxVal, minLoc, maxLoc, groupnum, src.channels(), src.cols);
1360 void cv::minMaxIdx(InputArray _src, double* minVal,
1361 double* maxVal, int* minIdx, int* maxIdx,
1364 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1365 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
1366 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
1368 CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
1369 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
1371 Mat src = _src.getMat(), mask = _mask.getMat();
1373 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
1374 size_t total_size = src.total();
1375 int rows = src.size[0], cols = (int)(total_size/rows);
1376 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) )
1378 IppiSize sz = { cols * cn, rows };
1382 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int,
1383 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1385 CV_SUPPRESS_DEPRECATED_START
1386 ippiMaskMinMaxIndxFuncC1 ippFuncC1 =
1387 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR :
1388 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR :
1389 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR :
1390 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0;
1391 CV_SUPPRESS_DEPRECATED_END
1396 IppiPoint minp, maxp;
1397 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1400 *minVal = (double)min;
1402 *maxVal = (double)max;
1403 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.data[0] )
1404 minp.x = maxp.x = -1;
1407 size_t minidx = minp.y * cols + minp.x + 1;
1408 ofs2idx(src, minidx, minIdx);
1412 size_t maxidx = maxp.y * cols + maxp.x + 1;
1413 ofs2idx(src, maxidx, maxIdx);
1417 setIppErrorStatus();
1422 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *);
1424 CV_SUPPRESS_DEPRECATED_START
1425 ippiMinMaxIndxFuncC1 ippFuncC1 =
1426 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R :
1427 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R :
1428 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R :
1429 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 0;
1430 CV_SUPPRESS_DEPRECATED_END
1435 IppiPoint minp, maxp;
1436 if( ippFuncC1(src.data, (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 )
1439 *minVal = (double)min;
1441 *maxVal = (double)max;
1444 size_t minidx = minp.y * cols + minp.x + 1;
1445 ofs2idx(src, minidx, minIdx);
1449 size_t maxidx = maxp.y * cols + maxp.x + 1;
1450 ofs2idx(src, maxidx, maxIdx);
1454 setIppErrorStatus();
1460 MinMaxIdxFunc func = getMinmaxTab(depth);
1461 CV_Assert( func != 0 );
1463 const Mat* arrays[] = {&src, &mask, 0};
1465 NAryMatIterator it(arrays, ptrs);
1467 size_t minidx = 0, maxidx = 0;
1468 int iminval = INT_MAX, imaxval = INT_MIN;
1469 float fminval = FLT_MAX, fmaxval = -FLT_MAX;
1470 double dminval = DBL_MAX, dmaxval = -DBL_MAX;
1471 size_t startidx = 1;
1472 int *minval = &iminval, *maxval = &imaxval;
1473 int planeSize = (int)it.size*cn;
1475 if( depth == CV_32F )
1476 minval = (int*)&fminval, maxval = (int*)&fmaxval;
1477 else if( depth == CV_64F )
1478 minval = (int*)&dminval, maxval = (int*)&dmaxval;
1480 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize )
1481 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx );
1484 dminval = dmaxval = 0;
1485 else if( depth == CV_32F )
1486 dminval = fminval, dmaxval = fmaxval;
1487 else if( depth <= CV_32S )
1488 dminval = iminval, dmaxval = imaxval;
1496 ofs2idx(src, minidx, minIdx);
1498 ofs2idx(src, maxidx, maxIdx);
1501 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal,
1502 Point* minLoc, Point* maxLoc, InputArray mask )
1504 CV_Assert(_img.dims() <= 2);
1506 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask);
1508 std::swap(minLoc->x, minLoc->y);
1510 std::swap(maxLoc->x, maxLoc->y);
1513 /****************************************************************************************\
1515 \****************************************************************************************/
1520 float normL2Sqr_(const float* a, const float* b, int n)
1522 int j = 0; float d = 0.f;
1526 float CV_DECL_ALIGNED(16) buf[4];
1527 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1529 for( ; j <= n - 8; j += 8 )
1531 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1532 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1533 d0 = _mm_add_ps(d0, _mm_mul_ps(t0, t0));
1534 d1 = _mm_add_ps(d1, _mm_mul_ps(t1, t1));
1536 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1537 d = buf[0] + buf[1] + buf[2] + buf[3];
1542 for( ; j <= n - 4; j += 4 )
1544 float t0 = a[j] - b[j], t1 = a[j+1] - b[j+1], t2 = a[j+2] - b[j+2], t3 = a[j+3] - b[j+3];
1545 d += t0*t0 + t1*t1 + t2*t2 + t3*t3;
1551 float t = a[j] - b[j];
1558 float normL1_(const float* a, const float* b, int n)
1560 int j = 0; float d = 0.f;
1564 float CV_DECL_ALIGNED(16) buf[4];
1565 static const int CV_DECL_ALIGNED(16) absbuf[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
1566 __m128 d0 = _mm_setzero_ps(), d1 = _mm_setzero_ps();
1567 __m128 absmask = _mm_load_ps((const float*)absbuf);
1569 for( ; j <= n - 8; j += 8 )
1571 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j));
1572 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
1573 d0 = _mm_add_ps(d0, _mm_and_ps(t0, absmask));
1574 d1 = _mm_add_ps(d1, _mm_and_ps(t1, absmask));
1576 _mm_store_ps(buf, _mm_add_ps(d0, d1));
1577 d = buf[0] + buf[1] + buf[2] + buf[3];
1582 for( ; j <= n - 4; j += 4 )
1584 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1585 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1590 d += std::abs(a[j] - b[j]);
1594 int normL1_(const uchar* a, const uchar* b, int n)
1600 __m128i d0 = _mm_setzero_si128();
1602 for( ; j <= n - 16; j += 16 )
1604 __m128i t0 = _mm_loadu_si128((const __m128i*)(a + j));
1605 __m128i t1 = _mm_loadu_si128((const __m128i*)(b + j));
1607 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1610 for( ; j <= n - 4; j += 4 )
1612 __m128i t0 = _mm_cvtsi32_si128(*(const int*)(a + j));
1613 __m128i t1 = _mm_cvtsi32_si128(*(const int*)(b + j));
1615 d0 = _mm_add_epi32(d0, _mm_sad_epu8(t0, t1));
1617 d = _mm_cvtsi128_si32(_mm_add_epi32(d0, _mm_unpackhi_epi64(d0, d0)));
1622 for( ; j <= n - 4; j += 4 )
1624 d += std::abs(a[j] - b[j]) + std::abs(a[j+1] - b[j+1]) +
1625 std::abs(a[j+2] - b[j+2]) + std::abs(a[j+3] - b[j+3]);
1629 d += std::abs(a[j] - b[j]);
1633 static const uchar popCountTable[] =
1635 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1636 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1637 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1638 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1639 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1640 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1641 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1642 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
1645 static const uchar popCountTable2[] =
1647 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1648 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
1649 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1650 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1651 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1652 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1653 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
1654 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
1657 static const uchar popCountTable4[] =
1659 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1660 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1661 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1662 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1663 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1664 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1665 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1666 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
1669 static int normHamming(const uchar* a, int n)
1671 int i = 0, result = 0;
1674 uint32x4_t bits = vmovq_n_u32(0);
1675 for (; i <= n - 16; i += 16) {
1676 uint8x16_t A_vec = vld1q_u8 (a + i);
1677 uint8x16_t bitsSet = vcntq_u8 (A_vec);
1678 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1679 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1680 bits = vaddq_u32(bits, bitSet4);
1682 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1683 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1684 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1687 for( ; i <= n - 4; i += 4 )
1688 result += popCountTable[a[i]] + popCountTable[a[i+1]] +
1689 popCountTable[a[i+2]] + popCountTable[a[i+3]];
1691 result += popCountTable[a[i]];
1695 int normHamming(const uchar* a, const uchar* b, int n)
1697 int i = 0, result = 0;
1700 uint32x4_t bits = vmovq_n_u32(0);
1701 for (; i <= n - 16; i += 16) {
1702 uint8x16_t A_vec = vld1q_u8 (a + i);
1703 uint8x16_t B_vec = vld1q_u8 (b + i);
1704 uint8x16_t AxorB = veorq_u8 (A_vec, B_vec);
1705 uint8x16_t bitsSet = vcntq_u8 (AxorB);
1706 uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet);
1707 uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8);
1708 bits = vaddq_u32(bits, bitSet4);
1710 uint64x2_t bitSet2 = vpaddlq_u32 (bits);
1711 result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0);
1712 result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2);
1715 for( ; i <= n - 4; i += 4 )
1716 result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] +
1717 popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]];
1719 result += popCountTable[a[i] ^ b[i]];
1723 static int normHamming(const uchar* a, int n, int cellSize)
1726 return normHamming(a, n);
1727 const uchar* tab = 0;
1729 tab = popCountTable2;
1730 else if( cellSize == 4 )
1731 tab = popCountTable4;
1733 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1734 int i = 0, result = 0;
1735 #if CV_ENABLE_UNROLLED
1736 for( ; i <= n - 4; i += 4 )
1737 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
1740 result += tab[a[i]];
1744 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
1747 return normHamming(a, b, n);
1748 const uchar* tab = 0;
1750 tab = popCountTable2;
1751 else if( cellSize == 4 )
1752 tab = popCountTable4;
1754 CV_Error( CV_StsBadSize, "bad cell size (not 1, 2 or 4) in normHamming" );
1755 int i = 0, result = 0;
1756 #if CV_ENABLE_UNROLLED
1757 for( ; i <= n - 4; i += 4 )
1758 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
1759 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
1762 result += tab[a[i] ^ b[i]];
1767 template<typename T, typename ST> int
1768 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1770 ST result = *_result;
1773 result = std::max(result, normInf<T, ST>(src, len*cn));
1777 for( int i = 0; i < len; i++, src += cn )
1780 for( int k = 0; k < cn; k++ )
1781 result = std::max(result, ST(std::abs(src[k])));
1788 template<typename T, typename ST> int
1789 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1791 ST result = *_result;
1794 result += normL1<T, ST>(src, len*cn);
1798 for( int i = 0; i < len; i++, src += cn )
1801 for( int k = 0; k < cn; k++ )
1802 result += std::abs(src[k]);
1809 template<typename T, typename ST> int
1810 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
1812 ST result = *_result;
1815 result += normL2Sqr<T, ST>(src, len*cn);
1819 for( int i = 0; i < len; i++, src += cn )
1822 for( int k = 0; k < cn; k++ )
1833 template<typename T, typename ST> int
1834 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1836 ST result = *_result;
1839 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
1843 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1846 for( int k = 0; k < cn; k++ )
1847 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
1854 template<typename T, typename ST> int
1855 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1857 ST result = *_result;
1860 result += normL1<T, ST>(src1, src2, len*cn);
1864 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1867 for( int k = 0; k < cn; k++ )
1868 result += std::abs(src1[k] - src2[k]);
1875 template<typename T, typename ST> int
1876 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
1878 ST result = *_result;
1881 result += normL2Sqr<T, ST>(src1, src2, len*cn);
1885 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
1888 for( int k = 0; k < cn; k++ )
1890 ST v = src1[k] - src2[k];
1900 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
1901 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
1902 { return norm##L##_(src, mask, r, len, cn); } \
1903 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
1904 const uchar* mask, ntype* r, int len, int cn) \
1905 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
1907 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
1908 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
1909 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
1910 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
1912 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
1913 CV_DEF_NORM_ALL(8s, schar, int, int, int)
1914 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
1915 CV_DEF_NORM_ALL(16s, short, int, int, double)
1916 CV_DEF_NORM_ALL(32s, int, int, double, double)
1917 CV_DEF_NORM_ALL(32f, float, float, double, double)
1918 CV_DEF_NORM_ALL(64f, double, double, double, double)
1921 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
1922 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
1924 static NormFunc getNormFunc(int normType, int depth)
1926 static NormFunc normTab[3][8] =
1929 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
1930 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
1933 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
1934 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
1937 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
1938 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
1942 return normTab[normType][depth];
1945 static NormDiffFunc getNormDiffFunc(int normType, int depth)
1947 static NormDiffFunc normDiffTab[3][8] =
1950 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
1951 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
1952 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
1953 (NormDiffFunc)normDiffInf_64f, 0
1956 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
1957 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
1958 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
1959 (NormDiffFunc)normDiffL1_64f, 0
1962 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
1963 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
1964 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
1965 (NormDiffFunc)normDiffL2_64f, 0
1969 return normDiffTab[normType][depth];
1974 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
1976 const ocl::Device & d = ocl::Device::getDefault();
1977 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1978 bool doubleSupport = d.doubleFPConfig() > 0,
1979 haveMask = _mask.kind() != _InputArray::NONE;
1981 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
1982 (!doubleSupport && depth == CV_64F))
1985 UMat src = _src.getUMat();
1987 if (normType == NORM_INF)
1989 if (cn == 1 || !haveMask)
1993 if (depth != CV_8U && depth != CV_16U)
1995 int wdepth = std::max(CV_32S, depth), rowsPerWI = d.isIntel() ? 4 : 1;
1998 ocl::Kernel kabs("KF", ocl::core::arithm_oclsrc,
1999 format("-D UNARY_OP -D OP_ABS_NOSAT -D dstT=%s -D srcT1=%s"
2000 " -D convertToDT=%s -D rowsPerWI=%d%s",
2001 ocl::typeToStr(wdepth), ocl::typeToStr(depth),
2002 ocl::convertTypeStr(depth, wdepth, 1, cvt), rowsPerWI,
2003 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
2007 abssrc.create(src.size(), CV_MAKE_TYPE(wdepth, cn));
2008 kabs.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(abssrc, cn));
2010 size_t globalsize[2] = { src.cols * cn, (src.rows + rowsPerWI - 1) / rowsPerWI };
2011 if (!kabs.run(2, globalsize, NULL, false))
2017 cv::minMaxIdx(haveMask ? abssrc : abssrc.reshape(1), NULL, &result, NULL, NULL, _mask);
2021 int dbsize = d.maxComputeUnits();
2022 size_t wgs = d.maxWorkGroupSize();
2024 int wgs2_aligned = 1;
2025 while (wgs2_aligned < (int)wgs)
2029 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
2030 format("-D OP_NORM_INF_MASK -D HAVE_MASK -D DEPTH_%d"
2031 " -D srcT=%s -D srcT1=%s -D WGS=%d -D cn=%d -D WGS2_ALIGNED=%d%s",
2032 depth, ocl::typeToStr(type), ocl::typeToStr(depth),
2033 wgs, cn, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
2037 UMat db(1, dbsize, type), mask = _mask.getUMat();
2038 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(),
2039 dbsize, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask));
2041 size_t globalsize = dbsize * wgs;
2042 if (!k.run(1, &globalsize, &wgs, true))
2045 minMaxIdx(db.getMat(ACCESS_READ), NULL, &result, NULL, NULL, noArray());
2048 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
2051 bool unstype = depth == CV_8U || depth == CV_16U;
2053 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
2054 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
2061 for (int i = 0; i < cn; ++i)
2064 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
2074 double cv::norm( InputArray _src, int normType, InputArray _mask )
2076 normType &= NORM_TYPE_MASK;
2077 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2078 normType == NORM_L2 || normType == NORM_L2SQR ||
2079 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
2083 CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
2084 ocl_norm(_src, normType, _mask, _result),
2088 Mat src = _src.getMat(), mask = _mask.getMat();
2089 int depth = src.depth(), cn = src.channels();
2091 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2092 size_t total_size = src.total();
2093 int rows = src.size[0], cols = (int)(total_size/rows);
2095 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
2096 && cols > 0 && (size_t)rows*cols == total_size
2097 && (normType == NORM_INF || normType == NORM_L1 ||
2098 normType == NORM_L2 || normType == NORM_L2SQR) )
2100 IppiSize sz = { cols, rows };
2101 int type = src.type();
2104 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2105 ippiMaskNormFuncC1 ippFuncC1 =
2106 normType == NORM_INF ?
2107 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
2108 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR :
2109 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
2110 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
2112 normType == NORM_L1 ?
2113 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
2114 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR :
2115 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
2116 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
2118 normType == NORM_L2 || normType == NORM_L2SQR ?
2119 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
2120 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR :
2121 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
2122 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
2127 if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2128 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2130 setIppErrorStatus();
2132 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2133 ippiMaskNormFuncC3 ippFuncC3 =
2134 normType == NORM_INF ?
2135 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
2136 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR :
2137 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
2138 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
2140 normType == NORM_L1 ?
2141 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
2142 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR :
2143 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
2144 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
2146 normType == NORM_L2 || normType == NORM_L2SQR ?
2147 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
2148 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR :
2149 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
2150 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
2154 Ipp64f norm1, norm2, norm3;
2155 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2156 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2157 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2160 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2161 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2162 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2164 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2166 setIppErrorStatus();
2171 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2172 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
2173 ippiNormFuncHint ippFuncHint =
2174 normType == NORM_L1 ?
2175 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
2176 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R :
2177 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R :
2179 normType == NORM_L2 || normType == NORM_L2SQR ?
2180 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
2181 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R :
2182 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R :
2184 ippiNormFuncNoHint ippFuncNoHint =
2185 normType == NORM_INF ?
2186 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
2187 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R :
2188 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R :
2189 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
2190 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R :
2191 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R :
2192 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
2193 #if (IPP_VERSION_X100 >= 801)
2194 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2195 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2197 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
2198 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R :
2199 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R :
2201 normType == NORM_L1 ?
2202 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
2203 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R :
2204 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R :
2205 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
2206 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R :
2207 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R :
2208 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
2209 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R :
2210 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R :
2212 normType == NORM_L2 || normType == NORM_L2SQR ?
2213 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
2214 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R :
2215 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R :
2216 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
2217 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R :
2218 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R :
2219 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
2220 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R :
2221 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R :
2223 // Make sure only zero or one version of the function pointer is valid
2224 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2225 if( ippFuncHint || ippFuncNoHint )
2227 Ipp64f norm_array[4];
2228 IppStatus ret = ippFuncHint ? ippFuncHint(src.data, (int)src.step[0], sz, norm_array, ippAlgHintAccurate) :
2229 ippFuncNoHint(src.data, (int)src.step[0], sz, norm_array);
2232 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2233 for( int i = 1; i < cn; i++ )
2236 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2237 normType == NORM_L1 ? norm + norm_array[i] :
2238 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2241 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2243 setIppErrorStatus();
2249 if( src.isContinuous() && mask.empty() )
2251 size_t len = src.total()*cn;
2252 if( len == (size_t)(int)len )
2254 if( depth == CV_32F )
2256 const float* data = src.ptr<float>();
2258 if( normType == NORM_L2 )
2261 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2262 return std::sqrt(result);
2264 if( normType == NORM_L2SQR )
2267 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
2270 if( normType == NORM_L1 )
2273 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
2276 if( normType == NORM_INF )
2279 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
2283 if( depth == CV_8U )
2285 const uchar* data = src.ptr<uchar>();
2287 if( normType == NORM_HAMMING )
2288 return normHamming(data, (int)len);
2290 if( normType == NORM_HAMMING2 )
2291 return normHamming(data, (int)len, 2);
2296 CV_Assert( mask.empty() || mask.type() == CV_8U );
2298 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2303 bitwise_and(src, mask, temp);
2304 return norm(temp, normType);
2306 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2308 const Mat* arrays[] = {&src, 0};
2310 NAryMatIterator it(arrays, ptrs);
2311 int total = (int)it.size;
2314 for( size_t i = 0; i < it.nplanes; i++, ++it )
2315 result += normHamming(ptrs[0], total, cellSize);
2320 NormFunc func = getNormFunc(normType >> 1, depth);
2321 CV_Assert( func != 0 );
2323 const Mat* arrays[] = {&src, &mask, 0};
2333 NAryMatIterator it(arrays, ptrs);
2334 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2335 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2336 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2338 int *ibuf = &result.i;
2343 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
2344 blockSize = std::min(blockSize, intSumBlockSize);
2346 esz = src.elemSize();
2349 for( size_t i = 0; i < it.nplanes; i++, ++it )
2351 for( j = 0; j < total; j += blockSize )
2353 int bsz = std::min(total - j, blockSize);
2354 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn );
2356 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2368 if( normType == NORM_INF )
2370 if( depth == CV_64F )
2372 else if( depth == CV_32F )
2373 result.d = result.f;
2375 result.d = result.i;
2377 else if( normType == NORM_L2 )
2378 result.d = std::sqrt(result.d);
2387 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
2389 const ocl::Device & d = ocl::Device::getDefault();
2390 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), rowsPerWI = d.isIntel() ? 4 : 1;
2391 bool doubleSupport = d.doubleFPConfig() > 0;
2392 bool relative = (normType & NORM_RELATIVE) != 0;
2393 normType &= ~NORM_RELATIVE;
2395 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
2396 (!doubleSupport && depth == CV_64F))
2399 int wdepth = std::max(CV_32S, depth);
2401 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
2402 format("-D BINARY_OP -D OP_ABSDIFF -D dstT=%s -D workT=dstT -D srcT1=%s -D srcT2=srcT1"
2403 " -D convertToDT=%s -D convertToWT1=convertToDT -D convertToWT2=convertToDT -D rowsPerWI=%d%s",
2404 ocl::typeToStr(wdepth), ocl::typeToStr(depth),
2405 ocl::convertTypeStr(depth, wdepth, 1, cvt), rowsPerWI,
2406 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
2410 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), diff(src1.size(), CV_MAKE_TYPE(wdepth, cn));
2411 k.args(ocl::KernelArg::ReadOnlyNoSize(src1), ocl::KernelArg::ReadOnlyNoSize(src2),
2412 ocl::KernelArg::WriteOnly(diff, cn));
2414 size_t globalsize[2] = { diff.cols * cn, (diff.rows + rowsPerWI - 1) / rowsPerWI };
2415 if (!k.run(2, globalsize, NULL, false))
2418 result = cv::norm(diff, normType, _mask);
2420 result /= cv::norm(src2, normType, _mask) + DBL_EPSILON;
2429 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
2431 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() );
2435 CV_OCL_RUN_(_src1.isUMat() && _src2.isUMat() &&
2436 _src1.dims() <= 2 && _src2.dims() <= 2,
2437 ocl_norm(_src1, _src2, normType, _mask, _result),
2441 if( normType & CV_RELATIVE )
2443 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2444 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2446 normType &= NORM_TYPE_MASK;
2447 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
2448 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2449 size_t total_size = src1.total();
2450 int rows = src1.size[0], cols = (int)(total_size/rows);
2451 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2452 && cols > 0 && (size_t)rows*cols == total_size
2453 && (normType == NORM_INF || normType == NORM_L1 ||
2454 normType == NORM_L2 || normType == NORM_L2SQR) )
2456 IppiSize sz = { cols, rows };
2457 int type = src1.type();
2460 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2461 ippiMaskNormRelFuncC1 ippFuncC1 =
2462 normType == NORM_INF ?
2463 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR :
2464 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR :
2465 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR :
2466 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR :
2468 normType == NORM_L1 ?
2469 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR :
2470 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR :
2471 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR :
2472 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR :
2474 normType == NORM_L2 || normType == NORM_L2SQR ?
2475 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR :
2476 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR :
2477 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR :
2478 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR :
2483 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2484 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2485 setIppErrorStatus();
2490 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2491 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2492 ippiNormRelFuncNoHint ippFuncNoHint =
2493 normType == NORM_INF ?
2494 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
2495 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
2496 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
2497 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
2499 normType == NORM_L1 ?
2500 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
2501 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
2502 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
2504 normType == NORM_L2 || normType == NORM_L2SQR ?
2505 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
2506 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
2507 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
2509 ippiNormRelFuncHint ippFuncHint =
2510 normType == NORM_L1 ?
2511 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
2513 normType == NORM_L2 || normType == NORM_L2SQR ?
2514 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
2519 if( ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm) >= 0 )
2520 return (double)norm;
2521 setIppErrorStatus();
2526 if( ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 )
2527 return (double)norm;
2528 setIppErrorStatus();
2533 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
2536 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
2537 int depth = src1.depth(), cn = src1.channels();
2540 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
2541 normType == NORM_L2 || normType == NORM_L2SQR ||
2542 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
2544 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
2545 size_t total_size = src1.total();
2546 int rows = src1.size[0], cols = (int)(total_size/rows);
2547 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
2548 && cols > 0 && (size_t)rows*cols == total_size
2549 && (normType == NORM_INF || normType == NORM_L1 ||
2550 normType == NORM_L2 || normType == NORM_L2SQR) )
2552 IppiSize sz = { cols, rows };
2553 int type = src1.type();
2556 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
2557 ippiMaskNormDiffFuncC1 ippFuncC1 =
2558 normType == NORM_INF ?
2559 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
2560 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR :
2561 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
2562 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
2564 normType == NORM_L1 ?
2565 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
2566 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR :
2567 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
2568 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
2570 normType == NORM_L2 || normType == NORM_L2SQR ?
2571 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
2572 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR :
2573 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
2574 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
2579 if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 )
2580 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2581 setIppErrorStatus();
2583 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
2584 ippiMaskNormDiffFuncC3 ippFuncC3 =
2585 normType == NORM_INF ?
2586 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
2587 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR :
2588 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
2589 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
2591 normType == NORM_L1 ?
2592 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
2593 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR :
2594 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
2595 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
2597 normType == NORM_L2 || normType == NORM_L2SQR ?
2598 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
2599 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR :
2600 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
2601 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
2605 Ipp64f norm1, norm2, norm3;
2606 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
2607 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
2608 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
2611 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
2612 normType == NORM_L1 ? norm1 + norm2 + norm3 :
2613 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
2615 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm;
2617 setIppErrorStatus();
2622 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
2623 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
2624 ippiNormDiffFuncHint ippFuncHint =
2625 normType == NORM_L1 ?
2626 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
2627 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R :
2628 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R :
2630 normType == NORM_L2 || normType == NORM_L2SQR ?
2631 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
2632 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R :
2633 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R :
2635 ippiNormDiffFuncNoHint ippFuncNoHint =
2636 normType == NORM_INF ?
2637 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
2638 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R :
2639 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R :
2640 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
2641 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R :
2642 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R :
2643 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
2644 #if (IPP_VERSION_X100 >= 801)
2645 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2646 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768
2648 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
2649 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R :
2650 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R :
2652 normType == NORM_L1 ?
2653 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
2654 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R :
2655 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R :
2656 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
2657 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R :
2658 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R :
2659 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
2660 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R :
2661 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R :
2663 normType == NORM_L2 || normType == NORM_L2SQR ?
2664 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
2665 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R :
2666 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R :
2667 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
2668 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R :
2669 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R :
2670 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
2671 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R :
2672 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R :
2674 // Make sure only zero or one version of the function pointer is valid
2675 CV_Assert(!ippFuncHint || !ippFuncNoHint);
2676 if( ippFuncHint || ippFuncNoHint )
2678 Ipp64f norm_array[4];
2679 IppStatus ret = ippFuncHint ? ippFuncHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) :
2680 ippFuncNoHint(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array);
2683 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0];
2684 for( int i = 1; i < src1.channels(); i++ )
2687 normType == NORM_INF ? std::max(norm, norm_array[i]) :
2688 normType == NORM_L1 ? norm + norm_array[i] :
2689 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] :
2692 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm;
2694 setIppErrorStatus();
2700 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
2702 size_t len = src1.total()*src1.channels();
2703 if( len == (size_t)(int)len )
2705 if( src1.depth() == CV_32F )
2707 const float* data1 = src1.ptr<float>();
2708 const float* data2 = src2.ptr<float>();
2710 if( normType == NORM_L2 )
2713 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2714 return std::sqrt(result);
2716 if( normType == NORM_L2SQR )
2719 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
2722 if( normType == NORM_L1 )
2725 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
2728 if( normType == NORM_INF )
2731 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
2738 CV_Assert( mask.empty() || mask.type() == CV_8U );
2740 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
2745 bitwise_xor(src1, src2, temp);
2746 bitwise_and(temp, mask, temp);
2747 return norm(temp, normType);
2749 int cellSize = normType == NORM_HAMMING ? 1 : 2;
2751 const Mat* arrays[] = {&src1, &src2, 0};
2753 NAryMatIterator it(arrays, ptrs);
2754 int total = (int)it.size;
2757 for( size_t i = 0; i < it.nplanes; i++, ++it )
2758 result += normHamming(ptrs[0], ptrs[1], total, cellSize);
2763 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth);
2764 CV_Assert( func != 0 );
2766 const Mat* arrays[] = {&src1, &src2, &mask, 0};
2777 NAryMatIterator it(arrays, ptrs);
2778 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0;
2779 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) ||
2780 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S);
2782 unsigned *ibuf = &result.u;
2787 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
2788 blockSize = std::min(blockSize, intSumBlockSize);
2790 esz = src1.elemSize();
2793 for( size_t i = 0; i < it.nplanes; i++, ++it )
2795 for( j = 0; j < total; j += blockSize )
2797 int bsz = std::min(total - j, blockSize);
2798 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn );
2800 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) )
2813 if( normType == NORM_INF )
2815 if( depth == CV_64F )
2817 else if( depth == CV_32F )
2818 result.d = result.f;
2820 result.d = result.u;
2822 else if( normType == NORM_L2 )
2823 result.d = std::sqrt(result.d);
2829 ///////////////////////////////////// batch distance ///////////////////////////////////////
2834 template<typename _Tp, typename _Rt>
2835 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2,
2836 int nvecs, int len, _Rt* dist, const uchar* mask)
2838 step2 /= sizeof(src2[0]);
2841 for( int i = 0; i < nvecs; i++ )
2842 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len);
2846 _Rt val0 = std::numeric_limits<_Rt>::max();
2847 for( int i = 0; i < nvecs; i++ )
2848 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
2852 template<typename _Tp, typename _Rt>
2853 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2,
2854 int nvecs, int len, _Rt* dist, const uchar* mask)
2856 step2 /= sizeof(src2[0]);
2859 for( int i = 0; i < nvecs; i++ )
2860 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len);
2864 _Rt val0 = std::numeric_limits<_Rt>::max();
2865 for( int i = 0; i < nvecs; i++ )
2866 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0;
2870 template<typename _Tp, typename _Rt>
2871 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2,
2872 int nvecs, int len, _Rt* dist, const uchar* mask)
2874 step2 /= sizeof(src2[0]);
2877 for( int i = 0; i < nvecs; i++ )
2878 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len));
2882 _Rt val0 = std::numeric_limits<_Rt>::max();
2883 for( int i = 0; i < nvecs; i++ )
2884 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0;
2888 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2,
2889 int nvecs, int len, int* dist, const uchar* mask)
2891 step2 /= sizeof(src2[0]);
2894 for( int i = 0; i < nvecs; i++ )
2895 dist[i] = normHamming(src1, src2 + step2*i, len);
2900 for( int i = 0; i < nvecs; i++ )
2901 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len) : val0;
2905 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2,
2906 int nvecs, int len, int* dist, const uchar* mask)
2908 step2 /= sizeof(src2[0]);
2911 for( int i = 0; i < nvecs; i++ )
2912 dist[i] = normHamming(src1, src2 + step2*i, len, 2);
2917 for( int i = 0; i < nvecs; i++ )
2918 dist[i] = mask[i] ? normHamming(src1, src2 + step2*i, len, 2) : val0;
2922 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2,
2923 int nvecs, int len, int* dist, const uchar* mask)
2925 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
2928 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2,
2929 int nvecs, int len, float* dist, const uchar* mask)
2931 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
2934 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2,
2935 int nvecs, int len, int* dist, const uchar* mask)
2937 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask);
2940 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2,
2941 int nvecs, int len, float* dist, const uchar* mask)
2943 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
2946 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2,
2947 int nvecs, int len, float* dist, const uchar* mask)
2949 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask);
2952 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2,
2953 int nvecs, int len, float* dist, const uchar* mask)
2955 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
2958 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2,
2959 int nvecs, int len, float* dist, const uchar* mask)
2961 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
2964 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2,
2965 int nvecs, int len, float* dist, const uchar* mask)
2967 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask);
2970 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2,
2971 int nvecs, int len, uchar* dist, const uchar* mask);
2974 struct BatchDistInvoker : public ParallelLoopBody
2976 BatchDistInvoker( const Mat& _src1, const Mat& _src2,
2977 Mat& _dist, Mat& _nidx, int _K,
2978 const Mat& _mask, int _update,
2979 BatchDistFunc _func)
2991 void operator()(const Range& range) const
2993 AutoBuffer<int> buf(src2->rows);
2996 for( int i = range.start; i < range.end; i++ )
2998 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols,
2999 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0);
3003 int* nidxptr = nidx->ptr<int>(i);
3004 // since positive float's can be compared just like int's,
3005 // we handle both CV_32S and CV_32F cases with a single branch
3006 int* distptr = (int*)dist->ptr(i);
3010 for( j = 0; j < src2->rows; j++ )
3013 if( d < distptr[K-1] )
3015 for( k = K-2; k >= 0 && distptr[k] > d; k-- )
3017 nidxptr[k+1] = nidxptr[k];
3018 distptr[k+1] = distptr[k];
3020 nidxptr[k+1] = j + update;
3040 void cv::batchDistance( InputArray _src1, InputArray _src2,
3041 OutputArray _dist, int dtype, OutputArray _nidx,
3042 int normType, int K, InputArray _mask,
3043 int update, bool crosscheck )
3045 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
3046 int type = src1.type();
3047 CV_Assert( type == src2.type() && src1.cols == src2.cols &&
3048 (type == CV_32F || type == CV_8U));
3049 CV_Assert( _nidx.needed() == (K > 0) );
3053 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F;
3055 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F);
3057 K = std::min(K, src2.rows);
3059 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype);
3060 Mat dist = _dist.getMat(), nidx;
3061 if( _nidx.needed() )
3063 _nidx.create(dist.size(), CV_32S);
3064 nidx = _nidx.getMat();
3067 if( update == 0 && K > 0 )
3069 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX);
3070 nidx = Scalar::all(-1);
3075 CV_Assert( K == 1 && update == 0 && mask.empty() );
3077 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false);
3079 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2,
3080 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set.
3081 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest
3082 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1.
3083 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2.
3084 // This O(N) procedure is called cross-check and it helps to eliminate some false matches.
3085 if( dtype == CV_32S )
3087 for( int i = 0; i < tdist.rows; i++ )
3089 int idx = tidx.at<int>(i);
3090 int d = tdist.at<int>(i), d0 = dist.at<int>(idx);
3093 dist.at<int>(idx) = d;
3094 nidx.at<int>(idx) = i + update;
3100 for( int i = 0; i < tdist.rows; i++ )
3102 int idx = tidx.at<int>(i);
3103 float d = tdist.at<float>(i), d0 = dist.at<float>(idx);
3106 dist.at<float>(idx) = d;
3107 nidx.at<int>(idx) = i + update;
3114 BatchDistFunc func = 0;
3117 if( normType == NORM_L1 && dtype == CV_32S )
3118 func = (BatchDistFunc)batchDistL1_8u32s;
3119 else if( normType == NORM_L1 && dtype == CV_32F )
3120 func = (BatchDistFunc)batchDistL1_8u32f;
3121 else if( normType == NORM_L2SQR && dtype == CV_32S )
3122 func = (BatchDistFunc)batchDistL2Sqr_8u32s;
3123 else if( normType == NORM_L2SQR && dtype == CV_32F )
3124 func = (BatchDistFunc)batchDistL2Sqr_8u32f;
3125 else if( normType == NORM_L2 && dtype == CV_32F )
3126 func = (BatchDistFunc)batchDistL2_8u32f;
3127 else if( normType == NORM_HAMMING && dtype == CV_32S )
3128 func = (BatchDistFunc)batchDistHamming;
3129 else if( normType == NORM_HAMMING2 && dtype == CV_32S )
3130 func = (BatchDistFunc)batchDistHamming2;
3132 else if( type == CV_32F && dtype == CV_32F )
3134 if( normType == NORM_L1 )
3135 func = (BatchDistFunc)batchDistL1_32f;
3136 else if( normType == NORM_L2SQR )
3137 func = (BatchDistFunc)batchDistL2Sqr_32f;
3138 else if( normType == NORM_L2 )
3139 func = (BatchDistFunc)batchDistL2_32f;
3143 CV_Error_(CV_StsUnsupportedFormat,
3144 ("The combination of type=%d, dtype=%d and normType=%d is not supported",
3145 type, dtype, normType));
3147 parallel_for_(Range(0, src1.rows),
3148 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func));
3152 void cv::findNonZero( InputArray _src, OutputArray _idx )
3154 Mat src = _src.getMat();
3155 CV_Assert( src.type() == CV_8UC1 );
3156 int n = countNonZero(src);
3157 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() )
3159 _idx.create(n, 1, CV_32SC2);
3160 Mat idx = _idx.getMat();
3161 CV_Assert(idx.isContinuous());
3162 Point* idx_ptr = (Point*)idx.data;
3164 for( int i = 0; i < src.rows; i++ )
3166 const uchar* bin_ptr = src.ptr(i);
3167 for( int j = 0; j < src.cols; j++ )
3169 *idx_ptr++ = Point(j, i);
3173 double cv::PSNR(InputArray _src1, InputArray _src2)
3175 CV_Assert( _src1.depth() == CV_8U );
3176 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
3177 return 20*log10(255./(diff+DBL_EPSILON));
3181 CV_IMPL CvScalar cvSum( const CvArr* srcarr )
3183 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
3184 if( CV_IS_IMAGE(srcarr) )
3186 int coi = cvGetImageCOI((IplImage*)srcarr);
3189 CV_Assert( 0 < coi && coi <= 4 );
3190 sum = cv::Scalar(sum[coi-1]);
3196 CV_IMPL int cvCountNonZero( const CvArr* imgarr )
3198 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3199 if( img.channels() > 1 )
3200 cv::extractImageCOI(imgarr, img);
3201 return countNonZero(img);
3206 cvAvg( const void* imgarr, const void* maskarr )
3208 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1);
3209 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr));
3210 if( CV_IS_IMAGE(imgarr) )
3212 int coi = cvGetImageCOI((IplImage*)imgarr);
3215 CV_Assert( 0 < coi && coi <= 4 );
3216 mean = cv::Scalar(mean[coi-1]);
3224 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr )
3226 cv::Scalar mean, sdv;
3230 mask = cv::cvarrToMat(maskarr);
3232 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask );
3234 if( CV_IS_IMAGE(imgarr) )
3236 int coi = cvGetImageCOI((IplImage*)imgarr);
3239 CV_Assert( 0 < coi && coi <= 4 );
3240 mean = cv::Scalar(mean[coi-1]);
3241 sdv = cv::Scalar(sdv[coi-1]);
3246 *(cv::Scalar*)_mean = mean;
3248 *(cv::Scalar*)_sdv = sdv;
3253 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal,
3254 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr )
3256 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1);
3258 mask = cv::cvarrToMat(maskarr);
3259 if( img.channels() > 1 )
3260 cv::extractImageCOI(imgarr, img);
3262 cv::minMaxLoc( img, _minVal, _maxVal,
3263 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask );
3268 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr )
3277 a = cv::cvarrToMat(imgA, false, true, 1);
3279 mask = cv::cvarrToMat(maskarr);
3281 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 )
3282 cv::extractImageCOI(imgA, a);
3285 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask);
3287 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1);
3288 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 )
3289 cv::extractImageCOI(imgB, b);
3291 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);