modules/core/src/norm.cpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html
   4
   5
   6 #include "precomp.hpp"
   7 #include "opencl_kernels_core.hpp"
   8 #include "stat.hpp"
   9
  10 /****************************************************************************************\
  11 *                                         norm                                           *
  12 \****************************************************************************************/
  13
  14 namespace cv { namespace hal {
  15
  16 extern const uchar popCountTable[256] =
  17 {
  18     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
  19     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  20     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  21     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  22     1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
  23     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  24     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
  25     3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
  26 };
  27
  28 static const uchar popCountTable2[] =
  29 {
  30     0, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
  31     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3,
  32     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
  33     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
  34     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
  35     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
  36     1, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4,
  37     2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 3, 3, 3, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4
  38 };
  39
  40 static const uchar popCountTable4[] =
  41 {
  42     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  43     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  44     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  45     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  46     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  47     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  48     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  49     1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  50 };
  51
  52
  53 int normHamming(const uchar* a, int n, int cellSize)
  54 {
  55     if( cellSize == 1 )
  56         return normHamming(a, n);
  57     const uchar* tab = 0;
  58     if( cellSize == 2 )
  59         tab = popCountTable2;
  60     else if( cellSize == 4 )
  61         tab = popCountTable4;
  62     else
  63         return -1;
  64     int i = 0;
  65     int result = 0;
  66 #if CV_SIMD
  67     v_uint64 t = vx_setzero_u64();
  68     if ( cellSize == 2)
  69     {
  70         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
  71         for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
  72         {
  73             v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
  74             t += v_popcount(v_reinterpret_as_u64((a0 | (a0 >> 1)) & mask));
  75         }
  76     }
  77     else    // cellSize == 4
  78     {
  79         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
  80         for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
  81         {
  82             v_uint16 a0 = v_reinterpret_as_u16(vx_load(a + i));
  83             v_uint16 a1 = a0 | (a0 >> 2);
  84             t += v_popcount(v_reinterpret_as_u64((a1 | (a1 >> 1)) & mask));
  85
  86         }
  87     }
  88     result += (int)v_reduce_sum(t);
  89     vx_cleanup();
  90 #elif CV_ENABLE_UNROLLED
  91     for( ; i <= n - 4; i += 4 )
  92         result += tab[a[i]] + tab[a[i+1]] + tab[a[i+2]] + tab[a[i+3]];
  93 #endif
  94     for( ; i < n; i++ )
  95         result += tab[a[i]];
  96     return result;
  97 }
  98
  99 int normHamming(const uchar* a, const uchar* b, int n, int cellSize)
 100 {
 101     if( cellSize == 1 )
 102         return normHamming(a, b, n);
 103     const uchar* tab = 0;
 104     if( cellSize == 2 )
 105         tab = popCountTable2;
 106     else if( cellSize == 4 )
 107         tab = popCountTable4;
 108     else
 109         return -1;
 110     int i = 0;
 111     int result = 0;
 112 #if CV_SIMD
 113     v_uint64 t = vx_setzero_u64();
 114     if ( cellSize == 2)
 115     {
 116         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x55));
 117         for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
 118         {
 119             v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
 120             t += v_popcount(v_reinterpret_as_u64((ab0 | (ab0 >> 1)) & mask));
 121         }
 122     }
 123     else    // cellSize == 4
 124     {
 125         v_uint16 mask = v_reinterpret_as_u16(vx_setall_u8(0x11));
 126         for(; i <= n - v_uint8::nlanes; i += v_uint8::nlanes)
 127         {
 128             v_uint16 ab0 = v_reinterpret_as_u16(vx_load(a + i) ^ vx_load(b + i));
 129             v_uint16 ab1 = ab0 | (ab0 >> 2);
 130             t += v_popcount(v_reinterpret_as_u64((ab1 | (ab1 >> 1)) & mask));
 131         }
 132     }
 133     result += (int)v_reduce_sum(t);
 134     vx_cleanup();
 135 #elif CV_ENABLE_UNROLLED
 136     for( ; i <= n - 4; i += 4 )
 137         result += tab[a[i] ^ b[i]] + tab[a[i+1] ^ b[i+1]] +
 138                 tab[a[i+2] ^ b[i+2]] + tab[a[i+3] ^ b[i+3]];
 139 #endif
 140     for( ; i < n; i++ )
 141         result += tab[a[i] ^ b[i]];
 142     return result;
 143 }
 144
 145 float normL2Sqr_(const float* a, const float* b, int n)
 146 {
 147     int j = 0; float d = 0.f;
 148 #if CV_SIMD
 149     v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
 150     v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
 151     for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
 152     {
 153         v_float32 t0 = vx_load(a + j) - vx_load(b + j);
 154         v_float32 t1 = vx_load(a + j + v_float32::nlanes) - vx_load(b + j + v_float32::nlanes);
 155         v_d0 = v_muladd(t0, t0, v_d0);
 156         v_float32 t2 = vx_load(a + j + 2 * v_float32::nlanes) - vx_load(b + j + 2 * v_float32::nlanes);
 157         v_d1 = v_muladd(t1, t1, v_d1);
 158         v_float32 t3 = vx_load(a + j + 3 * v_float32::nlanes) - vx_load(b + j + 3 * v_float32::nlanes);
 159         v_d2 = v_muladd(t2, t2, v_d2);
 160         v_d3 = v_muladd(t3, t3, v_d3);
 161     }
 162     d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
 163 #endif
 164     for( ; j < n; j++ )
 165     {
 166         float t = a[j] - b[j];
 167         d += t*t;
 168     }
 169     return d;
 170 }
 171
 172
 173 float normL1_(const float* a, const float* b, int n)
 174 {
 175     int j = 0; float d = 0.f;
 176 #if CV_SIMD
 177     v_float32 v_d0 = vx_setzero_f32(), v_d1 = vx_setzero_f32();
 178     v_float32 v_d2 = vx_setzero_f32(), v_d3 = vx_setzero_f32();
 179     for (; j <= n - 4 * v_float32::nlanes; j += 4 * v_float32::nlanes)
 180     {
 181         v_d0 += v_absdiff(vx_load(a + j), vx_load(b + j));
 182         v_d1 += v_absdiff(vx_load(a + j + v_float32::nlanes), vx_load(b + j + v_float32::nlanes));
 183         v_d2 += v_absdiff(vx_load(a + j + 2 * v_float32::nlanes), vx_load(b + j + 2 * v_float32::nlanes));
 184         v_d3 += v_absdiff(vx_load(a + j + 3 * v_float32::nlanes), vx_load(b + j + 3 * v_float32::nlanes));
 185     }
 186     d = v_reduce_sum(v_d0 + v_d1 + v_d2 + v_d3);
 187 #endif
 188     for( ; j < n; j++ )
 189         d += std::abs(a[j] - b[j]);
 190     return d;
 191 }
 192
 193 int normL1_(const uchar* a, const uchar* b, int n)
 194 {
 195     int j = 0, d = 0;
 196 #if CV_SIMD
 197     for (; j <= n - 4 * v_uint8::nlanes; j += 4 * v_uint8::nlanes)
 198         d += v_reduce_sad(vx_load(a + j), vx_load(b + j)) +
 199              v_reduce_sad(vx_load(a + j + v_uint8::nlanes), vx_load(b + j + v_uint8::nlanes)) +
 200              v_reduce_sad(vx_load(a + j + 2 * v_uint8::nlanes), vx_load(b + j + 2 * v_uint8::nlanes)) +
 201              v_reduce_sad(vx_load(a + j + 3 * v_uint8::nlanes), vx_load(b + j + 3 * v_uint8::nlanes));
 202 #endif
 203     for( ; j < n; j++ )
 204         d += std::abs(a[j] - b[j]);
 205     return d;
 206 }
 207
 208 }} //cv::hal
 209
 210 //==================================================================================================
 211
 212 namespace cv
 213 {
 214
 215 template<typename T, typename ST> int
 216 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
 217 {
 218     ST result = *_result;
 219     if( !mask )
 220     {
 221         result = std::max(result, normInf<T, ST>(src, len*cn));
 222     }
 223     else
 224     {
 225         for( int i = 0; i < len; i++, src += cn )
 226             if( mask[i] )
 227             {
 228                 for( int k = 0; k < cn; k++ )
 229                     result = std::max(result, ST(cv_abs(src[k])));
 230             }
 231     }
 232     *_result = result;
 233     return 0;
 234 }
 235
 236 template<typename T, typename ST> int
 237 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn)
 238 {
 239     ST result = *_result;
 240     if( !mask )
 241     {
 242         result += normL1<T, ST>(src, len*cn);
 243     }
 244     else
 245     {
 246         for( int i = 0; i < len; i++, src += cn )
 247             if( mask[i] )
 248             {
 249                 for( int k = 0; k < cn; k++ )
 250                     result += cv_abs(src[k]);
 251             }
 252     }
 253     *_result = result;
 254     return 0;
 255 }
 256
 257 template<typename T, typename ST> int
 258 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn)
 259 {
 260     ST result = *_result;
 261     if( !mask )
 262     {
 263         result += normL2Sqr<T, ST>(src, len*cn);
 264     }
 265     else
 266     {
 267         for( int i = 0; i < len; i++, src += cn )
 268             if( mask[i] )
 269             {
 270                 for( int k = 0; k < cn; k++ )
 271                 {
 272                     T v = src[k];
 273                     result += (ST)v*v;
 274                 }
 275             }
 276     }
 277     *_result = result;
 278     return 0;
 279 }
 280
 281 template<typename T, typename ST> int
 282 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
 283 {
 284     ST result = *_result;
 285     if( !mask )
 286     {
 287         result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
 288     }
 289     else
 290     {
 291         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
 292             if( mask[i] )
 293             {
 294                 for( int k = 0; k < cn; k++ )
 295                     result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
 296             }
 297     }
 298     *_result = result;
 299     return 0;
 300 }
 301
 302 template<typename T, typename ST> int
 303 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
 304 {
 305     ST result = *_result;
 306     if( !mask )
 307     {
 308         result += normL1<T, ST>(src1, src2, len*cn);
 309     }
 310     else
 311     {
 312         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
 313             if( mask[i] )
 314             {
 315                 for( int k = 0; k < cn; k++ )
 316                     result += std::abs(src1[k] - src2[k]);
 317             }
 318     }
 319     *_result = result;
 320     return 0;
 321 }
 322
 323 template<typename T, typename ST> int
 324 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
 325 {
 326     ST result = *_result;
 327     if( !mask )
 328     {
 329         result += normL2Sqr<T, ST>(src1, src2, len*cn);
 330     }
 331     else
 332     {
 333         for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
 334             if( mask[i] )
 335             {
 336                 for( int k = 0; k < cn; k++ )
 337                 {
 338                     ST v = src1[k] - src2[k];
 339                     result += v*v;
 340                 }
 341             }
 342     }
 343     *_result = result;
 344     return 0;
 345 }
 346
 347 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \
 348     static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \
 349 { return norm##L##_(src, mask, r, len, cn); } \
 350     static int normDiff##L##_##suffix(const type* src1, const type* src2, \
 351     const uchar* mask, ntype* r, int len, int cn) \
 352 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
 353
 354 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \
 355     CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \
 356     CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \
 357     CV_DEF_NORM_FUNC(L2, suffix, type, l2type)
 358
 359 CV_DEF_NORM_ALL(8u, uchar, int, int, int)
 360 CV_DEF_NORM_ALL(8s, schar, int, int, int)
 361 CV_DEF_NORM_ALL(16u, ushort, int, int, double)
 362 CV_DEF_NORM_ALL(16s, short, int, int, double)
 363 CV_DEF_NORM_ALL(32s, int, int, double, double)
 364 CV_DEF_NORM_ALL(32f, float, float, double, double)
 365 CV_DEF_NORM_ALL(64f, double, double, double, double)
 366
 367
 368 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
 369 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
 370
 371 static NormFunc getNormFunc(int normType, int depth)
 372 {
 373     static NormFunc normTab[3][8] =
 374     {
 375         {
 376             (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s),
 377             (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0
 378         },
 379         {
 380             (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s),
 381             (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0
 382         },
 383         {
 384             (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s),
 385             (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0
 386         }
 387     };
 388
 389     return normTab[normType][depth];
 390 }
 391
 392 static NormDiffFunc getNormDiffFunc(int normType, int depth)
 393 {
 394     static NormDiffFunc normDiffTab[3][8] =
 395     {
 396         {
 397             (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
 398             (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
 399             (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
 400             (NormDiffFunc)normDiffInf_64f, 0
 401         },
 402         {
 403             (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
 404             (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
 405             (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
 406             (NormDiffFunc)normDiffL1_64f, 0
 407         },
 408         {
 409             (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
 410             (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
 411             (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
 412             (NormDiffFunc)normDiffL2_64f, 0
 413         }
 414     };
 415
 416     return normDiffTab[normType][depth];
 417 }
 418
 419 #ifdef HAVE_OPENCL
 420
 421 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
 422 {
 423     const ocl::Device & d = ocl::Device::getDefault();
 424
 425 #ifdef __ANDROID__
 426     if (d.isNVidia())
 427         return false;
 428 #endif
 429     const int cn = _src.channels();
 430     if (cn > 4)
 431         return false;
 432     int type = _src.type(), depth = CV_MAT_DEPTH(type);
 433     bool doubleSupport = d.doubleFPConfig() > 0,
 434             haveMask = _mask.kind() != _InputArray::NONE;
 435
 436     if (depth >= CV_16F)
 437         return false;  // TODO: support FP16
 438
 439     if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ||
 440          (!doubleSupport && depth == CV_64F))
 441         return false;
 442
 443     UMat src = _src.getUMat();
 444
 445     if (normType == NORM_INF)
 446     {
 447         if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask,
 448                            std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U))
 449             return false;
 450     }
 451     else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR)
 452     {
 453         Scalar sc;
 454         bool unstype = depth == CV_8U || depth == CV_16U;
 455
 456         if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ?
 457                     OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) )
 458             return false;
 459
 460         double s = 0.0;
 461         for (int i = 0; i < (haveMask ? cn : 1); ++i)
 462             s += sc[i];
 463
 464         result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s);
 465     }
 466
 467     return true;
 468 }
 469
 470 #endif
 471
 472 #ifdef HAVE_IPP
 473 static bool ipp_norm(Mat &src, int normType, Mat &mask, double &result)
 474 {
 475     CV_INSTRUMENT_REGION_IPP();
 476
 477 #if IPP_VERSION_X100 >= 700
 478     size_t total_size = src.total();
 479     int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0;
 480
 481     if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
 482         && cols > 0 && (size_t)rows*cols == total_size )
 483     {
 484         if( !mask.empty() )
 485         {
 486             IppiSize sz = { cols, rows };
 487             int type = src.type();
 488
 489             typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
 490             ippiMaskNormFuncC1 ippiNorm_C1MR =
 491                 normType == NORM_INF ?
 492                 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR :
 493                 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR :
 494                 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR :
 495                 0) :
 496             normType == NORM_L1 ?
 497                 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR :
 498                 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR :
 499                 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR :
 500                 0) :
 501             normType == NORM_L2 || normType == NORM_L2SQR ?
 502                 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR :
 503                 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR :
 504                 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR :
 505                 0) : 0;
 506             if( ippiNorm_C1MR )
 507             {
 508                 Ipp64f norm;
 509                 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C1MR, src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
 510                 {
 511                     result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
 512                     return true;
 513                 }
 514             }
 515             typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
 516             ippiMaskNormFuncC3 ippiNorm_C3CMR =
 517                 normType == NORM_INF ?
 518                 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR :
 519                 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR :
 520                 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR :
 521                 0) :
 522             normType == NORM_L1 ?
 523                 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR :
 524                 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR :
 525                 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR :
 526                 0) :
 527             normType == NORM_L2 || normType == NORM_L2SQR ?
 528                 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR :
 529                 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR :
 530                 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR :
 531                 0) : 0;
 532             if( ippiNorm_C3CMR )
 533             {
 534                 Ipp64f norm1, norm2, norm3;
 535                 if( CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
 536                     CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
 537                     CV_INSTRUMENT_FUN_IPP(ippiNorm_C3CMR, src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
 538                 {
 539                     Ipp64f norm =
 540                         normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
 541                         normType == NORM_L1 ? norm1 + norm2 + norm3 :
 542                         normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
 543                         0;
 544                     result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
 545                     return true;
 546                 }
 547             }
 548         }
 549         else
 550         {
 551             IppiSize sz = { cols*src.channels(), rows };
 552             int type = src.depth();
 553
 554             typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
 555             typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *);
 556             ippiNormFuncHint ippiNormHint =
 557                 normType == NORM_L1 ?
 558                 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R :
 559                 0) :
 560                 normType == NORM_L2 || normType == NORM_L2SQR ?
 561                 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R :
 562                 0) : 0;
 563             ippiNormFuncNoHint ippiNorm =
 564                 normType == NORM_INF ?
 565                 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R :
 566                 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R :
 567                 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R :
 568                 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R :
 569                 0) :
 570                 normType == NORM_L1 ?
 571                 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R :
 572                 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R :
 573                 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R :
 574                 0) :
 575                 normType == NORM_L2 || normType == NORM_L2SQR ?
 576                 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R :
 577                 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R :
 578                 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R :
 579                 0) : 0;
 580             if( ippiNormHint || ippiNorm )
 581             {
 582                 Ipp64f norm;
 583                 IppStatus ret = ippiNormHint ? CV_INSTRUMENT_FUN_IPP(ippiNormHint, src.ptr(), (int)src.step[0], sz, &norm, ippAlgHintAccurate) :
 584                                 CV_INSTRUMENT_FUN_IPP(ippiNorm, src.ptr(), (int)src.step[0], sz, &norm);
 585                 if( ret >= 0 )
 586                 {
 587                     result = (normType == NORM_L2SQR) ? norm * norm : norm;
 588                     return true;
 589                 }
 590             }
 591         }
 592     }
 593 #else
 594     CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
 595 #endif
 596     return false;
 597 }
 598 #endif
 599
 600 } // cv::
 601
 602 double cv::norm( InputArray _src, int normType, InputArray _mask )
 603 {
 604     CV_INSTRUMENT_REGION();
 605
 606     normType &= NORM_TYPE_MASK;
 607     CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
 608                normType == NORM_L2 || normType == NORM_L2SQR ||
 609                ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) );
 610
 611 #if defined HAVE_OPENCL || defined HAVE_IPP
 612     double _result = 0;
 613 #endif
 614
 615 #ifdef HAVE_OPENCL
 616     CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
 617                 ocl_norm(_src, normType, _mask, _result),
 618                 _result)
 619 #endif
 620
 621     Mat src = _src.getMat(), mask = _mask.getMat();
 622     CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(src, normType, mask, _result), _result);
 623
 624     int depth = src.depth(), cn = src.channels();
 625     if( src.isContinuous() && mask.empty() )
 626     {
 627         size_t len = src.total()*cn;
 628         if( len == (size_t)(int)len )
 629         {
 630             if( depth == CV_32F )
 631             {
 632                 const float* data = src.ptr<float>();
 633
 634                 if( normType == NORM_L2 )
 635                 {
 636                     double result = 0;
 637                     GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
 638                     return std::sqrt(result);
 639                 }
 640                 if( normType == NORM_L2SQR )
 641                 {
 642                     double result = 0;
 643                     GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1);
 644                     return result;
 645                 }
 646                 if( normType == NORM_L1 )
 647                 {
 648                     double result = 0;
 649                     GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1);
 650                     return result;
 651                 }
 652                 if( normType == NORM_INF )
 653                 {
 654                     float result = 0;
 655                     GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1);
 656                     return result;
 657                 }
 658             }
 659             if( depth == CV_8U )
 660             {
 661                 const uchar* data = src.ptr<uchar>();
 662
 663                 if( normType == NORM_HAMMING )
 664                 {
 665                     return hal::normHamming(data, (int)len);
 666                 }
 667
 668                 if( normType == NORM_HAMMING2 )
 669                 {
 670                     return hal::normHamming(data, (int)len, 2);
 671                 }
 672             }
 673         }
 674     }
 675
 676     CV_Assert( mask.empty() || mask.type() == CV_8U );
 677
 678     if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
 679     {
 680         if( !mask.empty() )
 681         {
 682             Mat temp;
 683             bitwise_and(src, mask, temp);
 684             return norm(temp, normType);
 685         }
 686         int cellSize = normType == NORM_HAMMING ? 1 : 2;
 687
 688         const Mat* arrays[] = {&src, 0};
 689         uchar* ptrs[1] = {};
 690         NAryMatIterator it(arrays, ptrs);
 691         int total = (int)it.size;
 692         int result = 0;
 693
 694         for( size_t i = 0; i < it.nplanes; i++, ++it )
 695         {
 696             result += hal::normHamming(ptrs[0], total, cellSize);
 697         }
 698
 699         return result;
 700     }
 701
 702     NormFunc func = getNormFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
 703     CV_Assert( func != 0 );
 704
 705     const Mat* arrays[] = {&src, &mask, 0};
 706     uchar* ptrs[2] = {};
 707     union
 708     {
 709         double d;
 710         int i;
 711         float f;
 712     }
 713     result;
 714     result.d = 0;
 715     NAryMatIterator it(arrays, ptrs);
 716     CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
 717
 718     if ((normType == NORM_L1 && depth <= CV_16S) ||
 719         ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
 720     {
 721         // special case to handle "integer" overflow in accumulator
 722         const size_t esz = src.elemSize();
 723         const int total = (int)it.size;
 724         const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn;
 725         const int blockSize = std::min(total, intSumBlockSize);
 726         int isum = 0;
 727         int count = 0;
 728
 729         for (size_t i = 0; i < it.nplanes; i++, ++it)
 730         {
 731             for (int j = 0; j < total; j += blockSize)
 732             {
 733                 int bsz = std::min(total - j, blockSize);
 734                 func(ptrs[0], ptrs[1], (uchar*)&isum, bsz, cn);
 735                 count += bsz;
 736                 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
 737                 {
 738                     result.d += isum;
 739                     isum = 0;
 740                     count = 0;
 741                 }
 742                 ptrs[0] += bsz*esz;
 743                 if (ptrs[1])
 744                     ptrs[1] += bsz;
 745             }
 746         }
 747     }
 748     else if (depth == CV_16F)
 749     {
 750         const size_t esz = src.elemSize();
 751         const int total = (int)it.size;
 752         const int blockSize = std::min(total, divUp(1024, cn));
 753         AutoBuffer<float, 1026/*divUp(1024,3)*3*/> fltbuf(blockSize * cn);
 754         float* data0 = fltbuf.data();
 755         for (size_t i = 0; i < it.nplanes; i++, ++it)
 756         {
 757             for (int j = 0; j < total; j += blockSize)
 758             {
 759                 int bsz = std::min(total - j, blockSize);
 760                 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
 761                 func((uchar*)data0, ptrs[1], (uchar*)&result.d, bsz, cn);
 762                 ptrs[0] += bsz*esz;
 763                 if (ptrs[1])
 764                     ptrs[1] += bsz;
 765             }
 766         }
 767     }
 768     else
 769     {
 770         // generic implementation
 771         for (size_t i = 0; i < it.nplanes; i++, ++it)
 772         {
 773             func(ptrs[0], ptrs[1], (uchar*)&result, (int)it.size, cn);
 774         }
 775     }
 776
 777     if( normType == NORM_INF )
 778     {
 779         if(depth == CV_64F || depth == CV_16F)
 780             return result.d;
 781         else if (depth == CV_32F)
 782             return result.f;
 783         else
 784             return result.i;
 785     }
 786     else if( normType == NORM_L2 )
 787         return std::sqrt(result.d);
 788
 789     return result.d;
 790 }
 791
 792 //==================================================================================================
 793
 794 #ifdef HAVE_OPENCL
 795
 796 namespace cv {
 797
 798 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
 799 {
 800 #ifdef __ANDROID__
 801     if (ocl::Device::getDefault().isNVidia())
 802         return false;
 803 #endif
 804
 805     Scalar sc1, sc2;
 806     int cn = _src1.channels();
 807     if (cn > 4)
 808         return false;
 809     int type = _src1.type(), depth = CV_MAT_DEPTH(type);
 810     bool relative = (normType & NORM_RELATIVE) != 0;
 811     normType &= ~NORM_RELATIVE;
 812     bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR;
 813
 814 #ifdef __APPLE__
 815     if(normType == NORM_L1 && type == CV_16UC3 && !_mask.empty())
 816         return false;
 817 #endif
 818
 819     if (normsum)
 820     {
 821         if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ?
 822                      OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2))
 823             return false;
 824     }
 825     else
 826     {
 827         if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth),
 828                            false, _src2, relative ? &sc2[0] : NULL))
 829             return false;
 830         cn = 1;
 831     }
 832
 833     double s2 = 0;
 834     for (int i = 0; i < cn; ++i)
 835     {
 836         result += sc1[i];
 837         if (relative)
 838             s2 += sc2[i];
 839     }
 840
 841     if (normType == NORM_L2)
 842     {
 843         result = std::sqrt(result);
 844         if (relative)
 845             s2 = std::sqrt(s2);
 846     }
 847
 848     if (relative)
 849         result /= (s2 + DBL_EPSILON);
 850
 851     return true;
 852 }
 853
 854 }
 855
 856 #endif
 857
 858 #ifdef HAVE_IPP
 859 namespace cv
 860 {
 861 static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
 862 {
 863     CV_INSTRUMENT_REGION_IPP();
 864
 865 #if IPP_VERSION_X100 >= 700
 866     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
 867
 868     if( normType & CV_RELATIVE )
 869     {
 870         normType &= NORM_TYPE_MASK;
 871
 872         size_t total_size = src1.total();
 873         int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
 874         if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
 875             && cols > 0 && (size_t)rows*cols == total_size )
 876         {
 877             if( !mask.empty() )
 878             {
 879                 IppiSize sz = { cols, rows };
 880                 int type = src1.type();
 881
 882                 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
 883                 ippiMaskNormDiffFuncC1 ippiNormRel_C1MR =
 884                     normType == NORM_INF ?
 885                     (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_8u_C1MR :
 886                     type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_16u_C1MR :
 887                     type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_Inf_32f_C1MR :
 888                     0) :
 889                     normType == NORM_L1 ?
 890                     (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_8u_C1MR :
 891                     type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_16u_C1MR :
 892                     type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L1_32f_C1MR :
 893                     0) :
 894                     normType == NORM_L2 || normType == NORM_L2SQR ?
 895                     (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_8u_C1MR :
 896                     type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_16u_C1MR :
 897                     type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormRel_L2_32f_C1MR :
 898                     0) : 0;
 899                 if( ippiNormRel_C1MR )
 900                 {
 901                     Ipp64f norm;
 902                     if( CV_INSTRUMENT_FUN_IPP(ippiNormRel_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
 903                     {
 904                         result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
 905                         return true;
 906                     }
 907                 }
 908             }
 909             else
 910             {
 911                 IppiSize sz = { cols*src1.channels(), rows };
 912                 int type = src1.depth();
 913
 914                 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
 915                 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
 916                 ippiNormRelFuncHint ippiNormRelHint =
 917                     normType == NORM_L1 ?
 918                     (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R :
 919                     0) :
 920                     normType == NORM_L2 || normType == NORM_L2SQR ?
 921                     (type == CV_32F ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R :
 922                     0) : 0;
 923                 ippiNormRelFuncNoHint ippiNormRel =
 924                     normType == NORM_INF ?
 925                     (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R :
 926                     type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R :
 927                     type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R :
 928                     type == CV_32F ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R :
 929                     0) :
 930                     normType == NORM_L1 ?
 931                     (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R :
 932                     type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R :
 933                     type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R :
 934                     0) :
 935                     normType == NORM_L2 || normType == NORM_L2SQR ?
 936                     (type == CV_8U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R :
 937                     type == CV_16U ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R :
 938                     type == CV_16S ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R :
 939                     0) : 0;
 940                 if( ippiNormRelHint || ippiNormRel )
 941                 {
 942                     Ipp64f norm;
 943                     IppStatus ret = ippiNormRelHint ? CV_INSTRUMENT_FUN_IPP(ippiNormRelHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
 944                                     CV_INSTRUMENT_FUN_IPP(ippiNormRel, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
 945                     if( ret >= 0 )
 946                     {
 947                         result = (normType == NORM_L2SQR) ? norm * norm : norm;
 948                         return true;
 949                     }
 950                 }
 951             }
 952         }
 953         return false;
 954     }
 955
 956     normType &= NORM_TYPE_MASK;
 957
 958     size_t total_size = src1.total();
 959     int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0;
 960     if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
 961         && cols > 0 && (size_t)rows*cols == total_size )
 962     {
 963         if( !mask.empty() )
 964         {
 965             IppiSize sz = { cols, rows };
 966             int type = src1.type();
 967
 968             typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *);
 969             ippiMaskNormDiffFuncC1 ippiNormDiff_C1MR =
 970                 normType == NORM_INF ?
 971                 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR :
 972                 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR :
 973                 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR :
 974                 0) :
 975                 normType == NORM_L1 ?
 976                 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR :
 977                 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR :
 978                 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR :
 979                 0) :
 980                 normType == NORM_L2 || normType == NORM_L2SQR ?
 981                 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR :
 982                 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR :
 983                 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR :
 984                 0) : 0;
 985             if( ippiNormDiff_C1MR )
 986             {
 987                 Ipp64f norm;
 988                 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C1MR, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 )
 989                 {
 990                     result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
 991                     return true;
 992                 }
 993             }
 994             typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *);
 995             ippiMaskNormDiffFuncC3 ippiNormDiff_C3CMR =
 996                 normType == NORM_INF ?
 997                 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR :
 998                 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR :
 999                 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR :
1000                 0) :
1001                 normType == NORM_L1 ?
1002                 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR :
1003                 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR :
1004                 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR :
1005                 0) :
1006                 normType == NORM_L2 || normType == NORM_L2SQR ?
1007                 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR :
1008                 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR :
1009                 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR :
1010                 0) : 0;
1011             if (cv::ipp::getIppTopFeatures() & (
1012 #if IPP_VERSION_X100 >= 201700
1013                     ippCPUID_AVX512F |
1014 #endif
1015                     ippCPUID_AVX2)
1016             ) // IPP_DISABLE_NORM_16UC3_mask_small (#11399)
1017             {
1018                 if (normType == NORM_L1 && type == CV_16UC3 && sz.width < 16)
1019                     return false;
1020             }
1021             if( ippiNormDiff_C3CMR )
1022             {
1023                 Ipp64f norm1, norm2, norm3;
1024                 if( CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 &&
1025                     CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 &&
1026                     CV_INSTRUMENT_FUN_IPP(ippiNormDiff_C3CMR, src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0)
1027                 {
1028                     Ipp64f norm =
1029                         normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) :
1030                         normType == NORM_L1 ? norm1 + norm2 + norm3 :
1031                         normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) :
1032                         0;
1033                     result = (normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm);
1034                     return true;
1035                 }
1036             }
1037         }
1038         else
1039         {
1040             IppiSize sz = { cols*src1.channels(), rows };
1041             int type = src1.depth();
1042
1043             typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint);
1044             typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *);
1045             ippiNormDiffFuncHint ippiNormDiffHint =
1046                 normType == NORM_L1 ?
1047                 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R :
1048                 0) :
1049                 normType == NORM_L2 || normType == NORM_L2SQR ?
1050                 (type == CV_32F ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R :
1051                 0) : 0;
1052             ippiNormDiffFuncNoHint ippiNormDiff =
1053                 normType == NORM_INF ?
1054                 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R :
1055                 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R :
1056                 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R :
1057                 type == CV_32F ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R :
1058                 0) :
1059                 normType == NORM_L1 ?
1060                 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R :
1061                 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R :
1062                 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R :
1063                 0) :
1064                 normType == NORM_L2 || normType == NORM_L2SQR ?
1065                 (type == CV_8U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R :
1066                 type == CV_16U ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R :
1067                 type == CV_16S ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R :
1068                 0) : 0;
1069             if( ippiNormDiffHint || ippiNormDiff )
1070             {
1071                 Ipp64f norm;
1072                 IppStatus ret = ippiNormDiffHint ? CV_INSTRUMENT_FUN_IPP(ippiNormDiffHint, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) :
1073                                 CV_INSTRUMENT_FUN_IPP(ippiNormDiff, src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm);
1074                 if( ret >= 0 )
1075                 {
1076                     result = (normType == NORM_L2SQR) ? norm * norm : norm;
1077                     return true;
1078                 }
1079             }
1080         }
1081     }
1082 #else
1083     CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
1084 #endif
1085     return false;
1086 }
1087 }
1088 #endif
1089
1090
1091 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
1092 {
1093     CV_INSTRUMENT_REGION();
1094
1095     CV_CheckTypeEQ(_src1.type(), _src2.type(), "Input type mismatch");
1096     CV_Assert(_src1.sameSize(_src2));
1097
1098 #if defined HAVE_OPENCL || defined HAVE_IPP
1099     double _result = 0;
1100 #endif
1101
1102 #ifdef HAVE_OPENCL
1103     CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
1104                 ocl_norm(_src1, _src2, normType, _mask, _result),
1105                 _result)
1106 #endif
1107
1108     CV_IPP_RUN(IPP_VERSION_X100 >= 700, ipp_norm(_src1, _src2, normType, _mask, _result), _result);
1109
1110     if( normType & CV_RELATIVE )
1111     {
1112         return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON);
1113     }
1114
1115     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat();
1116     int depth = src1.depth(), cn = src1.channels();
1117
1118     normType &= 7;
1119     CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
1120                normType == NORM_L2 || normType == NORM_L2SQR ||
1121               ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1122
1123     if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1124     {
1125         size_t len = src1.total()*src1.channels();
1126         if( len == (size_t)(int)len )
1127         {
1128             if( src1.depth() == CV_32F )
1129             {
1130                 const float* data1 = src1.ptr<float>();
1131                 const float* data2 = src2.ptr<float>();
1132
1133                 if( normType == NORM_L2 )
1134                 {
1135                     double result = 0;
1136                     GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1137                     return std::sqrt(result);
1138                 }
1139                 if( normType == NORM_L2SQR )
1140                 {
1141                     double result = 0;
1142                     GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1143                     return result;
1144                 }
1145                 if( normType == NORM_L1 )
1146                 {
1147                     double result = 0;
1148                     GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1149                     return result;
1150                 }
1151                 if( normType == NORM_INF )
1152                 {
1153                     float result = 0;
1154                     GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
1155                     return result;
1156                 }
1157             }
1158         }
1159     }
1160
1161     CV_Assert( mask.empty() || mask.type() == CV_8U );
1162
1163     if( normType == NORM_HAMMING || normType == NORM_HAMMING2 )
1164     {
1165         if( !mask.empty() )
1166         {
1167             Mat temp;
1168             bitwise_xor(src1, src2, temp);
1169             bitwise_and(temp, mask, temp);
1170             return norm(temp, normType);
1171         }
1172         int cellSize = normType == NORM_HAMMING ? 1 : 2;
1173
1174         const Mat* arrays[] = {&src1, &src2, 0};
1175         uchar* ptrs[2] = {};
1176         NAryMatIterator it(arrays, ptrs);
1177         int total = (int)it.size;
1178         int result = 0;
1179
1180         for( size_t i = 0; i < it.nplanes; i++, ++it )
1181         {
1182             result += hal::normHamming(ptrs[0], ptrs[1], total, cellSize);
1183         }
1184
1185         return result;
1186     }
1187
1188     NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
1189     CV_Assert( func != 0 );
1190
1191     const Mat* arrays[] = {&src1, &src2, &mask, 0};
1192     uchar* ptrs[3] = {};
1193     union
1194     {
1195         double d;
1196         float f;
1197         int i;
1198         unsigned u;
1199     }
1200     result;
1201     result.d = 0;
1202     NAryMatIterator it(arrays, ptrs);
1203     CV_CheckLT((size_t)it.size, (size_t)INT_MAX, "");
1204
1205     if ((normType == NORM_L1 && depth <= CV_16S) ||
1206         ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S))
1207     {
1208         // special case to handle "integer" overflow in accumulator
1209         const size_t esz = src1.elemSize();
1210         const int total = (int)it.size;
1211         const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15);
1212         const int blockSize = std::min(total, intSumBlockSize);
1213         int isum = 0;
1214         int count = 0;
1215
1216         for (size_t i = 0; i < it.nplanes; i++, ++it)
1217         {
1218             for (int j = 0; j < total; j += blockSize)
1219             {
1220                 int bsz = std::min(total - j, blockSize);
1221                 func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&isum, bsz, cn);
1222                 count += bsz;
1223                 if (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total))
1224                 {
1225                     result.d += isum;
1226                     isum = 0;
1227                     count = 0;
1228                 }
1229                 ptrs[0] += bsz*esz;
1230                 ptrs[1] += bsz*esz;
1231                 if (ptrs[2])
1232                     ptrs[2] += bsz;
1233             }
1234         }
1235     }
1236     else if (depth == CV_16F)
1237     {
1238         const size_t esz = src1.elemSize();
1239         const int total = (int)it.size;
1240         const int blockSize = std::min(total, divUp(512, cn));
1241         AutoBuffer<float, 1026/*divUp(512,3)*3*2*/> fltbuf(blockSize * cn * 2);
1242         float* data0 = fltbuf.data();
1243         float* data1 = fltbuf.data() + blockSize * cn;
1244         for (size_t i = 0; i < it.nplanes; i++, ++it)
1245         {
1246             for (int j = 0; j < total; j += blockSize)
1247             {
1248                 int bsz = std::min(total - j, blockSize);
1249                 hal::cvt16f32f((const float16_t*)ptrs[0], data0, bsz * cn);
1250                 hal::cvt16f32f((const float16_t*)ptrs[1], data1, bsz * cn);
1251                 func((uchar*)data0, (uchar*)data1, ptrs[2], (uchar*)&result.d, bsz, cn);
1252                 ptrs[0] += bsz*esz;
1253                 ptrs[1] += bsz*esz;
1254                 if (ptrs[2])
1255                     ptrs[2] += bsz;
1256             }
1257         }
1258     }
1259     else
1260     {
1261         // generic implementation
1262         for (size_t i = 0; i < it.nplanes; i++, ++it)
1263         {
1264             func(ptrs[0], ptrs[1], ptrs[2], (uchar*)&result, (int)it.size, cn);
1265         }
1266     }
1267
1268     if( normType == NORM_INF )
1269     {
1270         if (depth == CV_64F || depth == CV_16F)
1271             return result.d;
1272         else if (depth == CV_32F)
1273             return result.f;
1274         else
1275             return result.u;
1276     }
1277     else if( normType == NORM_L2 )
1278         return std::sqrt(result.d);
1279
1280     return result.d;
1281 }
1282
1283 cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
1284 {
1285     return cv::hal::normHamming(a, b, size);
1286 }
1287
1288 double cv::PSNR(InputArray _src1, InputArray _src2, double R)
1289 {
1290     CV_INSTRUMENT_REGION();
1291
1292     //Input arrays must have depth CV_8U
1293     CV_Assert( _src1.type() == _src2.type() );
1294
1295     double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
1296     return 20*log10(R/(diff+DBL_EPSILON));
1297 }