1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html
7 #include "opencl_kernels_core.hpp"
10 /****************************************************************************************\
12 \****************************************************************************************/
14 namespace cv { namespace hal {
16 extern const uchar popCountTable[256] =
18 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
19 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
20 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
21 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
22 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
23 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
24 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
25 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
28 static const uchar popCountTable2[] =
30 0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
31 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
32 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
33 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
34 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
35 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
36 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
37 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
40 static const uchar popCountTable4[] =
42 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
43 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
44 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
45 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
46 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
48 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
49 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
53 int normHamming(const uchar* a, int n, int cellSize)
56 return normHamming(a, n);
60 else if( cellSize == 4 )
67 v_uint64 t = vx_setzero_u64();
70 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
71 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
73 v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
74 t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask));
79 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
80 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
82 v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
83 v_uint16 a1 = a0 | (a0 >> 2);
84 t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask));
88 result += (int)v_reduce_sum(t);
90 #elif CV_ENABLE_UNROLLED
91 for( ; i <= n - 4; i += 4 )
92 result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
99 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
102 return normHamming(a, b, n);
103 const uchar* tab = 0;
105 tab = popCountTable2;
106 else if( cellSize == 4 )
107 tab = popCountTable4;
113 v_uint64 t = vx_setzero_u64();
116 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
117 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
119 v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
120 t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask));
123 else // cellSize == 4
125 v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
126 for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
128 v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
129 v_uint16 ab1 = ab0 | (ab0 >> 2);
130 t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask));
133 result += (int)v_reduce_sum(t);
135 #elif CV_ENABLE_UNROLLED
136 for( ; i <= n - 4; i += 4 )
137 result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
138 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
141 result += tab[a[i] ^ b[i]];
145 float normL2Sqr_(const float* a, const float* b, int n)
147 int j = 0; float d = 0.f;
149 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
150 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
151 for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
153 v_float32 t0 = vx_load(a + j) - vx_load(b + j);
154 v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes);
155 v_d0 = v_muladd(t0, t0, v_d0);
156 v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes);
157 v_d1 = v_muladd(t1, t1, v_d1);
158 v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes);
159 v_d2 = v_muladd(t2, t2, v_d2);
160 v_d3 = v_muladd(t3, t3, v_d3);
162 d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
166 float t = a[j] - b[j];
173 float normL1_(const float* a, const float* b, int n)
175 int j = 0; float d = 0.f;
177 v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
178 v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
179 for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
181 v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j));
182 v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes));
183 v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes));
184 v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes));
186 d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
189 d += std::abs(a[j] - b[j]);
193 int normL1_(const uchar* a, const uchar* b, int n)
197 for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes)
198 d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) +
199 v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) +
200 v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) +
201 v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes));
204 d += std::abs(a[j] - b[j]);
210 //==================================================================================================
215 template<typename T, typename ST> int
216 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
218 ST result = *_result;
221 result = std::max(result, normInf<T, ST>(src, len*cn));
225 for( int i = 0; i < len; i++, src += cn )
228 for( int k = 0; k < cn; k++ )
229 result = std::max(result, ST(cv_abs(src[k])));
236 template<typename T, typename ST> int
237 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
239 ST result = *_result;
242 result += normL1<T, ST>(src, len*cn);
246 for( int i = 0; i < len; i++, src += cn )
249 for( int k = 0; k < cn; k++ )
250 result += cv_abs(src[k]);
257 template<typename T, typename ST> int
258 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
260 ST result = *_result;
263 result += normL2Sqr<T, ST>(src, len*cn);
267 for( int i = 0; i < len; i++, src += cn )
270 for( int k = 0; k < cn; k++ )
281 template<typename T, typename ST> int
282 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
284 ST result = *_result;
287 result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
291 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
294 for( int k = 0; k < cn; k++ )
295 result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
302 template<typename T, typename ST> int
303 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
305 ST result = *_result;
308 result += normL1<T, ST>(src1, src2, len*cn);
312 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
315 for( int k = 0; k < cn; k++ )
316 result += std::abs(src1[k] - src2[k]);
323 template<typename T, typename ST> int
324 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
326 ST result = *_result;
329 result += normL2Sqr<T, ST>(src1, src2, len*cn);
333 for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
336 for( int k = 0; k < cn; k++ )
338 ST v = src1[k] - src2[k];
347 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
348 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
349 { return norm##L##_(src, mask, r, len, cn); } \
350 static int normDiff##L##_##suffix(const type* src1, const type* src2, \
351 const uchar* mask, ntype* r, int len, int cn) \
352 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
354 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
355 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
356 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
357 CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
359 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
360 CV_DEF_NORM_ALL(8s, schar, int, int, int)
361 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
362 CV_DEF_NORM_ALL(16s, short, int, int, double)
363 CV_DEF_NORM_ALL(32s, int, int, double, double)
364 CV_DEF_NORM_ALL(32f, float, float, double, double)
365 CV_DEF_NORM_ALL(64f, double, double, double, double)
368 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
369 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
371 static NormFunc getNormFunc(int normType, int depth)
373 static NormFunc normTab[3][8] =
376 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
377 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
380 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
381 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
384 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
385 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
389 return normTab[normType][depth];
392 static NormDiffFunc getNormDiffFunc(int normType, int depth)
394 static NormDiffFunc normDiffTab[3][8] =
397 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
398 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
399 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
400 (NormDiffFunc)normDiffInf_64f, 0
403 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
404 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
405 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
406 (NormDiffFunc)normDiffL1_64f, 0
409 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
410 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
411 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
412 (NormDiffFunc)normDiffL2_64f, 0
416 return normDiffTab[normType][depth];
421 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
423 const ocl::Device & d = ocl::Device::getDefault();
429 const int cn = _src.channels();
432 int type = _src.type(), depth = CV_MAT_DEPTH(type);
433 bool doubleSupport = d.doubleFPConfig() > 0,
434 haveMask = _mask.kind() != _InputArray::NONE;
437 return false; // TODO: support FP16
439 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
440 (!doubleSupport && depth == CV_64F))
443 UMat src = _src.getUMat();
445 if (normType == NORM_INF)
447 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
448 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
451 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
454 bool unstype = depth == CV_8U || depth == CV_16U;
456 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
457 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
461 for (int i = 0; i < (haveMask ? cn : 1); ++i)
464 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
473 static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
475 CV_INSTRUMENT_REGION_IPP();
477 #if IPP_VERSION_X100 >= 700
478 size_t total_size = src.total();
479 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
481 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
482 && cols > 0 && (size_t)rows*cols == total_size )
486 IppiSize sz = { cols, rows };
487 int type = src.type();
489 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
490 ippiMaskNormFuncC1 ippiNorm_C1MR =
491 normType == NORM_INF ?
492 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
493 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
494 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
496 normType == NORM_L1 ?
497 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
498 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
499 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
501 normType == NORM_L2 || normType == NORM_L2SQR ?
502 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
503 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
504 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
509 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
511 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
515 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
516 ippiMaskNormFuncC3 ippiNorm_C3CMR =
517 normType == NORM_INF ?
518 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
519 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
520 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
522 normType == NORM_L1 ?
523 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
524 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
525 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
527 normType == NORM_L2 || normType == NORM_L2SQR ?
528 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
529 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
530 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
534 Ipp64f norm1, norm2, norm3;
535 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
536 CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
537 CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
540 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
541 normType == NORM_L1 ? norm1 + norm2 + norm3 :
542 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
544 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
551 IppiSize sz = { cols*src.channels(), rows };
552 int type = src.depth();
554 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
555 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
556 ippiNormFuncHint ippiNormHint =
557 normType == NORM_L1 ?
558 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
560 normType == NORM_L2 || normType == NORM_L2SQR ?
561 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
563 ippiNormFuncNoHint ippiNorm =
564 normType == NORM_INF ?
565 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
566 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
567 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
568 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
570 normType == NORM_L1 ?
571 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
572 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
573 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
575 normType == NORM_L2 || normType == NORM_L2SQR ?
576 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
577 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
578 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
580 if( ippiNormHint || ippiNorm )
583 IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) :
584 CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm);
587 result = (normType == NORM_L2SQR) ? norm * norm : norm;
594 CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
602 double cv::norm( InputArray _src, int normType, InputArray _mask )
604 CV_INSTRUMENT_REGION();
606 normType &= NORM_TYPE_MASK;
607 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
608 normType == NORM_L2 || normType == NORM_L2SQR ||
609 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
611 #if defined HAVE_OPENCL || defined HAVE_IPP
616 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
617 ocl_norm(_src, normType, _mask, _result),
621 Mat src = _src.getMat(), mask = _mask.getMat();
622 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);
624 int depth = src.depth(), cn = src.channels();
625 if( src.isContinuous() && mask.empty() )
627 size_t len = src.total()*cn;
628 if( len == (size_t)(int)len )
630 if( depth == CV_32F )
632 const float* data = src.ptr<float>();
634 if( normType == NORM_L2 )
637 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
638 return std::sqrt(result);
640 if( normType == NORM_L2SQR )
643 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
646 if( normType == NORM_L1 )
649 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
652 if( normType == NORM_INF )
655 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
661 const uchar* data = src.ptr<uchar>();
663 if( normType == NORM_HAMMING )
665 return hal::normHamming(data, (int)len);
668 if( normType == NORM_HAMMING2 )
670 return hal::normHamming(data, (int)len, 2);
676 CV_Assert( mask.empty() || mask.type() == CV_8U );
678 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
683 bitwise_and(src, mask, temp);
684 return norm(temp, normType);
686 int cellSize = normType == NORM_HAMMING ? 1 : 2;
688 const Mat* arrays[] = {&src, 0};
690 NAryMatIterator it(arrays, ptrs);
691 int total = (int)it.size;
694 for( size_t i = 0; i < it.nplanes; i++, ++it )
696 result += hal::normHamming(ptrs[0], total, cellSize);
702 NormFunc func = getNormFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
703 CV_Assert( func != 0 );
705 const Mat* arrays[] = {&src, &mask, 0};
715 NAryMatIterator it(arrays, ptrs);
716 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
718 if ((normType == NORM_L1 && depth <= CV_16S) ||
719 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
721 // special case to handle "integer" overflow in accumulator
722 const size_t esz = src.elemSize();
723 const int total = (int)it.size;
724 const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
725 const int blockSize = std::min(total, intSumBlockSize);
729 for (size_t i = 0; i < it.nplanes; i++, ++it)
731 for (int j = 0; j < total; j += blockSize)
733 int bsz = std::min(total - j, blockSize);
734 func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
736 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
748 else if (depth == CV_16F)
750 const size_t esz = src.elemSize();
751 const int total = (int)it.size;
752 const int blockSize = std::min(total, divUp(1024, cn));
753 AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
754 float* data0 = fltbuf.data();
755 for (size_t i = 0; i < it.nplanes; i++, ++it)
757 for (int j = 0; j < total; j += blockSize)
759 int bsz = std::min(total - j, blockSize);
760 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
761 func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
770 // generic implementation
771 for (size_t i = 0; i < it.nplanes; i++, ++it)
773 func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
777 if( normType == NORM_INF )
779 if(depth == CV_64F || depth == CV_16F)
781 else if (depth == CV_32F)
786 else if( normType == NORM_L2 )
787 return std::sqrt(result.d);
792 //==================================================================================================
798 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
801 if (ocl::Device::getDefault().isNVidia())
806 int cn = _src1.channels();
809 int type = _src1.type(), depth = CV_MAT_DEPTH(type);
810 bool relative = (normType & NORM_RELATIVE) != 0;
811 normType &= ~NORM_RELATIVE;
812 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
815 if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
821 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
822 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
827 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
828 false, _src2, relative ? &sc2[0] : NULL))
834 for (int i = 0; i < cn; ++i)
841 if (normType == NORM_L2)
843 result = std::sqrt(result);
849 result /= (s2 + DBL_EPSILON);
861 static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
863 CV_INSTRUMENT_REGION_IPP();
865 #if IPP_VERSION_X100 >= 700
866 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
868 if( normType & CV_RELATIVE )
870 normType &= NORM_TYPE_MASK;
872 size_t total_size = src1.total();
873 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
874 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
875 && cols > 0 && (size_t)rows*cols == total_size )
879 IppiSize sz = { cols, rows };
880 int type = src1.type();
882 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
883 ippiMaskNormDiffFuncC1 ippiNormRel_C1MR =
884 normType == NORM_INF ?
885 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR :
886 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR :
887 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR :
889 normType == NORM_L1 ?
890 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR :
891 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR :
892 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR :
894 normType == NORM_L2 || normType == NORM_L2SQR ?
895 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR :
896 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR :
897 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR :
899 if( ippiNormRel_C1MR )
902 if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
904 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
911 IppiSize sz = { cols*src1.channels(), rows };
912 int type = src1.depth();
914 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
915 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
916 ippiNormRelFuncHint ippiNormRelHint =
917 normType == NORM_L1 ?
918 (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
920 normType == NORM_L2 || normType == NORM_L2SQR ?
921 (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
923 ippiNormRelFuncNoHint ippiNormRel =
924 normType == NORM_INF ?
925 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
926 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
927 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
928 type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
930 normType == NORM_L1 ?
931 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
932 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
933 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
935 normType == NORM_L2 || normType == NORM_L2SQR ?
936 (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
937 type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
938 type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
940 if( ippiNormRelHint || ippiNormRel )
943 IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
944 CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
947 result = (normType == NORM_L2SQR) ? norm * norm : norm;
956 normType &= NORM_TYPE_MASK;
958 size_t total_size = src1.total();
959 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
960 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
961 && cols > 0 && (size_t)rows*cols == total_size )
965 IppiSize sz = { cols, rows };
966 int type = src1.type();
968 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
969 ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR =
970 normType == NORM_INF ?
971 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
972 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
973 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
975 normType == NORM_L1 ?
976 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
977 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
978 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
980 normType == NORM_L2 || normType == NORM_L2SQR ?
981 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
982 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
983 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
985 if( ippiNormDiff_C1MR )
988 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
990 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
994 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
995 ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR =
996 normType == NORM_INF ?
997 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
998 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
999 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
1001 normType == NORM_L1 ?
1002 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
1003 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
1004 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
1006 normType == NORM_L2 || normType == NORM_L2SQR ?
1007 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
1008 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
1009 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
1011 if (cv::ipp::getIppTopFeatures() & (
1012 #if IPP_VERSION_X100 >= 201700
1016 ) // IPP_DISABLE_NORM_16UC3_mask_small (#11399)
1018 if (normType == NORM_L1 && type == CV_16UC3 && sz.width < 16)
1021 if( ippiNormDiff_C3CMR )
1023 Ipp64f norm1, norm2, norm3;
1024 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
1025 CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
1026 CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
1029 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
1030 normType == NORM_L1 ? norm1 + norm2 + norm3 :
1031 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
1033 result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
1040 IppiSize sz = { cols*src1.channels(), rows };
1041 int type = src1.depth();
1043 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
1044 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
1045 ippiNormDiffFuncHint ippiNormDiffHint =
1046 normType == NORM_L1 ?
1047 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
1049 normType == NORM_L2 || normType == NORM_L2SQR ?
1050 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
1052 ippiNormDiffFuncNoHint ippiNormDiff =
1053 normType == NORM_INF ?
1054 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
1055 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
1056 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
1057 type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
1059 normType == NORM_L1 ?
1060 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
1061 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
1062 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
1064 normType == NORM_L2 || normType == NORM_L2SQR ?
1065 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
1066 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
1067 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
1069 if( ippiNormDiffHint || ippiNormDiff )
1072 IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
1073 CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
1076 result = (normType == NORM_L2SQR) ? norm * norm : norm;
1083 CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
1091 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1093 CV_INSTRUMENT_REGION();
1095 CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch");
1096 CV_Assert(_src1.sameSize(_src2));
1098 #if defined HAVE_OPENCL || defined HAVE_IPP
1103 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
1104 ocl_norm(_src1, _src2, normType, _mask, _result),
1108 CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);
1110 if( normType & CV_RELATIVE )
1112 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1115 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1116 int depth = src1.depth(), cn = src1.channels();
1119 CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
1120 normType == NORM_L2 || normType == NORM_L2SQR ||
1121 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1123 if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1125 size_t len = src1.total()*src1.channels();
1126 if( len == (size_t)(int)len )
1128 if( src1.depth() == CV_32F )
1130 const float* data1 = src1.ptr<float>();
1131 const float* data2 = src2.ptr<float>();
1133 if( normType == NORM_L2 )
1136 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1137 return std::sqrt(result);
1139 if( normType == NORM_L2SQR )
1142 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1145 if( normType == NORM_L1 )
1148 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1151 if( normType == NORM_INF )
1154 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1161 CV_Assert( mask.empty() || mask.type() == CV_8U );
1163 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1168 bitwise_xor(src1, src2, temp);
1169 bitwise_and(temp, mask, temp);
1170 return norm(temp, normType);
1172 int cellSize = normType == NORM_HAMMING ? 1 : 2;
1174 const Mat* arrays[] = {&src1, &src2, 0};
1175 uchar* ptrs[2] = {};
1176 NAryMatIterator it(arrays, ptrs);
1177 int total = (int)it.size;
1180 for( size_t i = 0; i < it.nplanes; i++, ++it )
1182 result += hal::normHamming(ptrs[0], ptrs[1], total, cellSize);
1188 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
1189 CV_Assert( func != 0 );
1191 const Mat* arrays[] = {&src1, &src2, &mask, 0};
1192 uchar* ptrs[3] = {};
1202 NAryMatIterator it(arrays, ptrs);
1203 CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
1205 if ((normType == NORM_L1 && depth <= CV_16S) ||
1206 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
1208 // special case to handle "integer" overflow in accumulator
1209 const size_t esz = src1.elemSize();
1210 const int total = (int)it.size;
1211 const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1212 const int blockSize = std::min(total, intSumBlockSize);
1216 for (size_t i = 0; i < it.nplanes; i++, ++it)
1218 for (int j = 0; j < total; j += blockSize)
1220 int bsz = std::min(total - j, blockSize);
1221 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
1223 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
1236 else if (depth == CV_16F)
1238 const size_t esz = src1.elemSize();
1239 const int total = (int)it.size;
1240 const int blockSize = std::min(total, divUp(512, cn));
1241 AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
1242 float* data0 = fltbuf.data();
1243 float* data1 = fltbuf.data() + blockSize * cn;
1244 for (size_t i = 0; i < it.nplanes; i++, ++it)
1246 for (int j = 0; j < total; j += blockSize)
1248 int bsz = std::min(total - j, blockSize);
1249 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
1250 hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
1251 func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
1261 // generic implementation
1262 for (size_t i = 0; i < it.nplanes; i++, ++it)
1264 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
1268 if( normType == NORM_INF )
1270 if (depth == CV_64F || depth == CV_16F)
1272 else if (depth == CV_32F)
1277 else if( normType == NORM_L2 )
1278 return std::sqrt(result.d);
1283 cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
1285 return cv::hal::normHamming(a, b, size);
1288 double cv::PSNR(InputArray _src1, InputArray _src2, double R)
1290 CV_INSTRUMENT_REGION();
1292 //Input arrays must have depth CV_8U
1293 CV_Assert( _src1.type() == _src2.type() );
1295 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
1296 return 20*log10(R/(diff+DBL_EPSILON));