1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 /* ////////////////////////////////////////////////////////////////////
45 // Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ...
49 #include "precomp.hpp"
55 struct IPPArithmInitializer
57 IPPArithmInitializer(void)
63 IPPArithmInitializer ippArithmInitializer;
68 template<typename T, class Op, class Op8>
69 void vBinOp8(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, size_t step, Size sz)
76 for( ; sz.height--; src1 += step1/sizeof(src1[0]),
77 src2 += step2/sizeof(src2[0]),
78 dst += step/sizeof(dst[0]) )
85 for( ; x <= sz.width - 32; x += 32 )
87 __m128i r0 = _mm_loadu_si128((const __m128i*)(src1 + x));
88 __m128i r1 = _mm_loadu_si128((const __m128i*)(src1 + x + 16));
89 r0 = op8(r0,_mm_loadu_si128((const __m128i*)(src2 + x)));
90 r1 = op8(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 16)));
91 _mm_storeu_si128((__m128i*)(dst + x), r0);
92 _mm_storeu_si128((__m128i*)(dst + x + 16), r1);
94 for( ; x <= sz.width - 8; x += 8 )
96 __m128i r0 = _mm_loadl_epi64((const __m128i*)(src1 + x));
97 r0 = op8(r0,_mm_loadl_epi64((const __m128i*)(src2 + x)));
98 _mm_storel_epi64((__m128i*)(dst + x), r0);
102 #if CV_ENABLE_UNROLLED
103 for( ; x <= sz.width - 4; x += 4 )
105 T v0 = op(src1[x], src2[x]);
106 T v1 = op(src1[x+1], src2[x+1]);
107 dst[x] = v0; dst[x+1] = v1;
108 v0 = op(src1[x+2], src2[x+2]);
109 v1 = op(src1[x+3], src2[x+3]);
110 dst[x+2] = v0; dst[x+3] = v1;
113 for( ; x < sz.width; x++ )
114 dst[x] = op(src1[x], src2[x]);
118 template<typename T, class Op, class Op16>
119 void vBinOp16(const T* src1, size_t step1, const T* src2, size_t step2,
120 T* dst, size_t step, Size sz)
127 for( ; sz.height--; src1 += step1/sizeof(src1[0]),
128 src2 += step2/sizeof(src2[0]),
129 dst += step/sizeof(dst[0]) )
136 for( ; x <= sz.width - 16; x += 16 )
138 __m128i r0 = _mm_loadu_si128((const __m128i*)(src1 + x));
139 __m128i r1 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
140 r0 = op16(r0,_mm_loadu_si128((const __m128i*)(src2 + x)));
141 r1 = op16(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 8)));
142 _mm_storeu_si128((__m128i*)(dst + x), r0);
143 _mm_storeu_si128((__m128i*)(dst + x + 8), r1);
145 for( ; x <= sz.width - 4; x += 4 )
147 __m128i r0 = _mm_loadl_epi64((const __m128i*)(src1 + x));
148 r0 = op16(r0,_mm_loadl_epi64((const __m128i*)(src2 + x)));
149 _mm_storel_epi64((__m128i*)(dst + x), r0);
155 for( ; x <= sz.width - 4; x += 4 )
157 T v0 = op(src1[x], src2[x]);
158 T v1 = op(src1[x+1], src2[x+1]);
159 dst[x] = v0; dst[x+1] = v1;
160 v0 = op(src1[x+2], src2[x+2]);
161 v1 = op(src1[x+3], src2[x+3]);
162 dst[x+2] = v0; dst[x+3] = v1;
165 for( ; x < sz.width; x++ )
166 dst[x] = op(src1[x], src2[x]);
171 template<class Op, class Op32>
172 void vBinOp32s(const int* src1, size_t step1, const int* src2, size_t step2,
173 int* dst, size_t step, Size sz)
180 for( ; sz.height--; src1 += step1/sizeof(src1[0]),
181 src2 += step2/sizeof(src2[0]),
182 dst += step/sizeof(dst[0]) )
189 if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
190 for( ; x <= sz.width - 8; x += 8 )
192 __m128i r0 = _mm_load_si128((const __m128i*)(src1 + x));
193 __m128i r1 = _mm_load_si128((const __m128i*)(src1 + x + 4));
194 r0 = op32(r0,_mm_load_si128((const __m128i*)(src2 + x)));
195 r1 = op32(r1,_mm_load_si128((const __m128i*)(src2 + x + 4)));
196 _mm_store_si128((__m128i*)(dst + x), r0);
197 _mm_store_si128((__m128i*)(dst + x + 4), r1);
200 for( ; x <= sz.width - 8; x += 8 )
202 __m128i r0 = _mm_loadu_si128((const __m128i*)(src1 + x));
203 __m128i r1 = _mm_loadu_si128((const __m128i*)(src1 + x + 4));
204 r0 = op32(r0,_mm_loadu_si128((const __m128i*)(src2 + x)));
205 r1 = op32(r1,_mm_loadu_si128((const __m128i*)(src2 + x + 4)));
206 _mm_storeu_si128((__m128i*)(dst + x), r0);
207 _mm_storeu_si128((__m128i*)(dst + x + 4), r1);
211 #if CV_ENABLE_UNROLLED
212 for( ; x <= sz.width - 4; x += 4 )
214 int v0 = op(src1[x], src2[x]);
215 int v1 = op(src1[x+1], src2[x+1]);
216 dst[x] = v0; dst[x+1] = v1;
217 v0 = op(src1[x+2], src2[x+2]);
218 v1 = op(src1[x+3], src2[x+3]);
219 dst[x+2] = v0; dst[x+3] = v1;
222 for( ; x < sz.width; x++ )
223 dst[x] = op(src1[x], src2[x]);
228 template<class Op, class Op32>
229 void vBinOp32f(const float* src1, size_t step1, const float* src2, size_t step2,
230 float* dst, size_t step, Size sz)
237 for( ; sz.height--; src1 += step1/sizeof(src1[0]),
238 src2 += step2/sizeof(src2[0]),
239 dst += step/sizeof(dst[0]) )
246 if( (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
247 for( ; x <= sz.width - 8; x += 8 )
249 __m128 r0 = _mm_load_ps(src1 + x);
250 __m128 r1 = _mm_load_ps(src1 + x + 4);
251 r0 = op32(r0,_mm_load_ps(src2 + x));
252 r1 = op32(r1,_mm_load_ps(src2 + x + 4));
253 _mm_store_ps(dst + x, r0);
254 _mm_store_ps(dst + x + 4, r1);
257 for( ; x <= sz.width - 8; x += 8 )
259 __m128 r0 = _mm_loadu_ps(src1 + x);
260 __m128 r1 = _mm_loadu_ps(src1 + x + 4);
261 r0 = op32(r0,_mm_loadu_ps(src2 + x));
262 r1 = op32(r1,_mm_loadu_ps(src2 + x + 4));
263 _mm_storeu_ps(dst + x, r0);
264 _mm_storeu_ps(dst + x + 4, r1);
268 #if CV_ENABLE_UNROLLED
269 for( ; x <= sz.width - 4; x += 4 )
271 float v0 = op(src1[x], src2[x]);
272 float v1 = op(src1[x+1], src2[x+1]);
273 dst[x] = v0; dst[x+1] = v1;
274 v0 = op(src1[x+2], src2[x+2]);
275 v1 = op(src1[x+3], src2[x+3]);
276 dst[x+2] = v0; dst[x+3] = v1;
279 for( ; x < sz.width; x++ )
280 dst[x] = op(src1[x], src2[x]);
284 template<class Op, class Op64>
285 void vBinOp64f(const double* src1, size_t step1, const double* src2, size_t step2,
286 double* dst, size_t step, Size sz)
293 for( ; sz.height--; src1 += step1/sizeof(src1[0]),
294 src2 += step2/sizeof(src2[0]),
295 dst += step/sizeof(dst[0]) )
300 if( USE_SSE2 && (((size_t)src1|(size_t)src2|(size_t)dst)&15) == 0 )
301 for( ; x <= sz.width - 4; x += 4 )
303 __m128d r0 = _mm_load_pd(src1 + x);
304 __m128d r1 = _mm_load_pd(src1 + x + 2);
305 r0 = op64(r0,_mm_load_pd(src2 + x));
306 r1 = op64(r1,_mm_load_pd(src2 + x + 2));
307 _mm_store_pd(dst + x, r0);
308 _mm_store_pd(dst + x + 2, r1);
312 for( ; x <= sz.width - 4; x += 4 )
314 double v0 = op(src1[x], src2[x]);
315 double v1 = op(src1[x+1], src2[x+1]);
316 dst[x] = v0; dst[x+1] = v1;
317 v0 = op(src1[x+2], src2[x+2]);
318 v1 = op(src1[x+3], src2[x+3]);
319 dst[x+2] = v0; dst[x+3] = v1;
322 for( ; x < sz.width; x++ )
323 dst[x] = op(src1[x], src2[x]);
329 struct _VAdd8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_adds_epu8(a,b); }};
330 struct _VSub8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_subs_epu8(a,b); }};
331 struct _VMin8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epu8(a,b); }};
332 struct _VMax8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epu8(a,b); }};
335 __m128i operator()(const __m128i& a, const __m128i& b) const
336 { return _mm_add_epi8(_mm_subs_epu8(a,b),_mm_subs_epu8(b,a)); }
339 struct _VAdd8s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_adds_epi8(a,b); }};
340 struct _VSub8s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_subs_epi8(a,b); }};
343 __m128i operator()(const __m128i& a, const __m128i& b) const
345 __m128i m = _mm_cmpgt_epi8(a, b);
346 return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(a, b), m));
351 __m128i operator()(const __m128i& a, const __m128i& b) const
353 __m128i m = _mm_cmpgt_epi8(b, a);
354 return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(a, b), m));
359 __m128i operator()(const __m128i& a, const __m128i& b) const
361 __m128i d = _mm_subs_epi8(a, b);
362 __m128i m = _mm_cmpgt_epi8(b, a);
363 return _mm_subs_epi8(_mm_xor_si128(d, m), m);
367 struct _VAdd16u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_adds_epu16(a,b); }};
368 struct _VSub16u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_subs_epu16(a,b); }};
371 __m128i operator()(const __m128i& a, const __m128i& b) const
372 { return _mm_subs_epu16(a,_mm_subs_epu16(a,b)); }
376 __m128i operator()(const __m128i& a, const __m128i& b) const
377 { return _mm_adds_epu16(_mm_subs_epu16(a,b),b); }
381 __m128i operator()(const __m128i& a, const __m128i& b) const
382 { return _mm_add_epi16(_mm_subs_epu16(a,b),_mm_subs_epu16(b,a)); }
385 struct _VAdd16s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_adds_epi16(a,b); }};
386 struct _VSub16s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_subs_epi16(a,b); }};
387 struct _VMin16s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epi16(a,b); }};
388 struct _VMax16s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epi16(a,b); }};
391 __m128i operator()(const __m128i& a, const __m128i& b) const
393 __m128i M = _mm_max_epi16(a,b), m = _mm_min_epi16(a,b);
394 return _mm_subs_epi16(M, m);
398 struct _VAdd32s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_add_epi32(a,b); }};
399 struct _VSub32s { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_sub_epi32(a,b); }};
402 __m128i operator()(const __m128i& a, const __m128i& b) const
404 __m128i m = _mm_cmpgt_epi32(a, b);
405 return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(a, b), m));
410 __m128i operator()(const __m128i& a, const __m128i& b) const
412 __m128i m = _mm_cmpgt_epi32(b, a);
413 return _mm_xor_si128(a, _mm_and_si128(_mm_xor_si128(a, b), m));
418 __m128i operator()(const __m128i& a, const __m128i& b) const
420 __m128i d = _mm_sub_epi32(a, b);
421 __m128i m = _mm_cmpgt_epi32(b, a);
422 return _mm_sub_epi32(_mm_xor_si128(d, m), m);
426 struct _VAdd32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_add_ps(a,b); }};
427 struct _VSub32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_sub_ps(a,b); }};
428 struct _VMin32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_min_ps(a,b); }};
429 struct _VMax32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_max_ps(a,b); }};
430 static int CV_DECL_ALIGNED(16) v32f_absmask[] = { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff };
433 __m128 operator()(const __m128& a, const __m128& b) const
435 return _mm_and_ps(_mm_sub_ps(a,b), *(const __m128*)v32f_absmask);
439 struct _VAdd64f { __m128d operator()(const __m128d& a, const __m128d& b) const { return _mm_add_pd(a,b); }};
440 struct _VSub64f { __m128d operator()(const __m128d& a, const __m128d& b) const { return _mm_sub_pd(a,b); }};
441 struct _VMin64f { __m128d operator()(const __m128d& a, const __m128d& b) const { return _mm_min_pd(a,b); }};
442 struct _VMax64f { __m128d operator()(const __m128d& a, const __m128d& b) const { return _mm_max_pd(a,b); }};
444 static int CV_DECL_ALIGNED(16) v64f_absmask[] = { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff };
447 __m128d operator()(const __m128d& a, const __m128d& b) const
449 return _mm_and_pd(_mm_sub_pd(a,b), *(const __m128d*)v64f_absmask);
453 struct _VAnd8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_and_si128(a,b); }};
454 struct _VOr8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_or_si128(a,b); }};
455 struct _VXor8u { __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_xor_si128(a,b); }};
456 struct _VNot8u { __m128i operator()(const __m128i& a, const __m128i&) const { return _mm_xor_si128(_mm_set1_epi32(-1),a); }};
461 #define IF_SIMD(op) op
463 #define IF_SIMD(op) NOP
466 template<> inline uchar OpAdd<uchar>::operator ()(uchar a, uchar b) const
467 { return CV_FAST_CAST_8U(a + b); }
468 template<> inline uchar OpSub<uchar>::operator ()(uchar a, uchar b) const
469 { return CV_FAST_CAST_8U(a - b); }
471 template<typename T> struct OpAbsDiff
476 T operator()(T a, T b) const { return (T)std::abs(a - b); }
479 template<> inline short OpAbsDiff<short>::operator ()(short a, short b) const
480 { return saturate_cast<short>(std::abs(a - b)); }
482 template<> inline schar OpAbsDiff<schar>::operator ()(schar a, schar b) const
483 { return saturate_cast<schar>(std::abs(a - b)); }
485 template<typename T, typename WT=T> struct OpAbsDiffS
490 T operator()(T a, WT b) const { return saturate_cast<T>(std::abs(a - b)); }
493 template<typename T> struct OpAnd
498 T operator()( T a, T b ) const { return a & b; }
501 template<typename T> struct OpOr
506 T operator()( T a, T b ) const { return a | b; }
509 template<typename T> struct OpXor
514 T operator()( T a, T b ) const { return a ^ b; }
517 template<typename T> struct OpNot
522 T operator()( T a, T ) const { return ~a; }
525 static inline void fixSteps(Size sz, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
528 step1 = step2 = step = sz.width*elemSize;
531 static void add8u( const uchar* src1, size_t step1,
532 const uchar* src2, size_t step2,
533 uchar* dst, size_t step, Size sz, void* )
535 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
536 ippiAdd_8u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
537 (vBinOp8<uchar, OpAdd<uchar>, IF_SIMD(_VAdd8u)>(src1, step1, src2, step2, dst, step, sz)));
540 static void add8s( const schar* src1, size_t step1,
541 const schar* src2, size_t step2,
542 schar* dst, size_t step, Size sz, void* )
544 vBinOp8<schar, OpAdd<schar>, IF_SIMD(_VAdd8s)>(src1, step1, src2, step2, dst, step, sz);
547 static void add16u( const ushort* src1, size_t step1,
548 const ushort* src2, size_t step2,
549 ushort* dst, size_t step, Size sz, void* )
551 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
552 ippiAdd_16u_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
553 (vBinOp16<ushort, OpAdd<ushort>, IF_SIMD(_VAdd16u)>(src1, step1, src2, step2, dst, step, sz)));
556 static void add16s( const short* src1, size_t step1,
557 const short* src2, size_t step2,
558 short* dst, size_t step, Size sz, void* )
560 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
561 ippiAdd_16s_C1RSfs(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz, 0),
562 (vBinOp16<short, OpAdd<short>, IF_SIMD(_VAdd16s)>(src1, step1, src2, step2, dst, step, sz)));
565 static void add32s( const int* src1, size_t step1,
566 const int* src2, size_t step2,
567 int* dst, size_t step, Size sz, void* )
569 vBinOp32s<OpAdd<int>, IF_SIMD(_VAdd32s)>(src1, step1, src2, step2, dst, step, sz);
572 static void add32f( const float* src1, size_t step1,
573 const float* src2, size_t step2,
574 float* dst, size_t step, Size sz, void* )
576 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
577 ippiAdd_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
578 (vBinOp32f<OpAdd<float>, IF_SIMD(_VAdd32f)>(src1, step1, src2, step2, dst, step, sz)));
581 static void add64f( const double* src1, size_t step1,
582 const double* src2, size_t step2,
583 double* dst, size_t step, Size sz, void* )
585 vBinOp64f<OpAdd<double>, IF_SIMD(_VAdd64f)>(src1, step1, src2, step2, dst, step, sz);
588 static void sub8u( const uchar* src1, size_t step1,
589 const uchar* src2, size_t step2,
590 uchar* dst, size_t step, Size sz, void* )
592 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
593 ippiSub_8u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
594 (vBinOp8<uchar, OpSub<uchar>, IF_SIMD(_VSub8u)>(src1, step1, src2, step2, dst, step, sz)));
597 static void sub8s( const schar* src1, size_t step1,
598 const schar* src2, size_t step2,
599 schar* dst, size_t step, Size sz, void* )
601 vBinOp8<schar, OpSub<schar>, IF_SIMD(_VSub8s)>(src1, step1, src2, step2, dst, step, sz);
604 static void sub16u( const ushort* src1, size_t step1,
605 const ushort* src2, size_t step2,
606 ushort* dst, size_t step, Size sz, void* )
608 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
609 ippiSub_16u_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
610 (vBinOp16<ushort, OpSub<ushort>, IF_SIMD(_VSub16u)>(src1, step1, src2, step2, dst, step, sz)));
613 static void sub16s( const short* src1, size_t step1,
614 const short* src2, size_t step2,
615 short* dst, size_t step, Size sz, void* )
617 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
618 ippiSub_16s_C1RSfs(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz, 0),
619 (vBinOp16<short, OpSub<short>, IF_SIMD(_VSub16s)>(src1, step1, src2, step2, dst, step, sz)));
622 static void sub32s( const int* src1, size_t step1,
623 const int* src2, size_t step2,
624 int* dst, size_t step, Size sz, void* )
626 vBinOp32s<OpSub<int>, IF_SIMD(_VSub32s)>(src1, step1, src2, step2, dst, step, sz);
629 static void sub32f( const float* src1, size_t step1,
630 const float* src2, size_t step2,
631 float* dst, size_t step, Size sz, void* )
633 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
634 ippiSub_32f_C1R(src2, (int)step2, src1, (int)step1, dst, (int)step, (IppiSize&)sz),
635 (vBinOp32f<OpSub<float>, IF_SIMD(_VSub32f)>(src1, step1, src2, step2, dst, step, sz)));
638 static void sub64f( const double* src1, size_t step1,
639 const double* src2, size_t step2,
640 double* dst, size_t step, Size sz, void* )
642 vBinOp64f<OpSub<double>, IF_SIMD(_VSub64f)>(src1, step1, src2, step2, dst, step, sz);
645 template<> inline uchar OpMin<uchar>::operator ()(uchar a, uchar b) const { return CV_MIN_8U(a, b); }
646 template<> inline uchar OpMax<uchar>::operator ()(uchar a, uchar b) const { return CV_MAX_8U(a, b); }
648 static void max8u( const uchar* src1, size_t step1,
649 const uchar* src2, size_t step2,
650 uchar* dst, size_t step, Size sz, void* )
652 #if (ARITHM_USE_IPP == 1)
654 uchar* s1 = (uchar*)src1;
655 uchar* s2 = (uchar*)src2;
657 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
658 for(int i = 0; i < sz.height; i++)
660 ippsMaxEvery_8u(s1, s2, d, sz.width);
667 vBinOp8<uchar, OpMax<uchar>, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz);
670 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
671 // ippiMaxEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
672 // (vBinOp8<uchar, OpMax<uchar>, IF_SIMD(_VMax8u)>(src1, step1, src2, step2, dst, step, sz)));
675 static void max8s( const schar* src1, size_t step1,
676 const schar* src2, size_t step2,
677 schar* dst, size_t step, Size sz, void* )
679 vBinOp8<schar, OpMax<schar>, IF_SIMD(_VMax8s)>(src1, step1, src2, step2, dst, step, sz);
682 static void max16u( const ushort* src1, size_t step1,
683 const ushort* src2, size_t step2,
684 ushort* dst, size_t step, Size sz, void* )
686 #if (ARITHM_USE_IPP == 1)
688 ushort* s1 = (ushort*)src1;
689 ushort* s2 = (ushort*)src2;
691 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
692 for(int i = 0; i < sz.height; i++)
694 ippsMaxEvery_16u(s1, s2, d, sz.width);
695 s1 = (ushort*)((uchar*)s1 + step1);
696 s2 = (ushort*)((uchar*)s2 + step2);
697 d = (ushort*)((uchar*)d + step);
701 vBinOp16<ushort, OpMax<ushort>, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz);
704 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
705 // ippiMaxEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
706 // (vBinOp16<ushort, OpMax<ushort>, IF_SIMD(_VMax16u)>(src1, step1, src2, step2, dst, step, sz)));
709 static void max16s( const short* src1, size_t step1,
710 const short* src2, size_t step2,
711 short* dst, size_t step, Size sz, void* )
713 vBinOp16<short, OpMax<short>, IF_SIMD(_VMax16s)>(src1, step1, src2, step2, dst, step, sz);
716 static void max32s( const int* src1, size_t step1,
717 const int* src2, size_t step2,
718 int* dst, size_t step, Size sz, void* )
720 vBinOp32s<OpMax<int>, IF_SIMD(_VMax32s)>(src1, step1, src2, step2, dst, step, sz);
723 static void max32f( const float* src1, size_t step1,
724 const float* src2, size_t step2,
725 float* dst, size_t step, Size sz, void* )
727 #if (ARITHM_USE_IPP == 1)
729 float* s1 = (float*)src1;
730 float* s2 = (float*)src2;
732 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
733 for(int i = 0; i < sz.height; i++)
735 ippsMaxEvery_32f(s1, s2, d, sz.width);
736 s1 = (float*)((uchar*)s1 + step1);
737 s2 = (float*)((uchar*)s2 + step2);
738 d = (float*)((uchar*)d + step);
742 vBinOp32f<OpMax<float>, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz);
744 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
745 // ippiMaxEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
746 // (vBinOp32f<OpMax<float>, IF_SIMD(_VMax32f)>(src1, step1, src2, step2, dst, step, sz)));
749 static void max64f( const double* src1, size_t step1,
750 const double* src2, size_t step2,
751 double* dst, size_t step, Size sz, void* )
753 vBinOp64f<OpMax<double>, IF_SIMD(_VMax64f)>(src1, step1, src2, step2, dst, step, sz);
756 static void min8u( const uchar* src1, size_t step1,
757 const uchar* src2, size_t step2,
758 uchar* dst, size_t step, Size sz, void* )
760 #if (ARITHM_USE_IPP == 1)
762 uchar* s1 = (uchar*)src1;
763 uchar* s2 = (uchar*)src2;
765 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
766 for(int i = 0; i < sz.height; i++)
768 ippsMinEvery_8u(s1, s2, d, sz.width);
775 vBinOp8<uchar, OpMin<uchar>, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz);
778 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
779 // ippiMinEvery_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
780 // (vBinOp8<uchar, OpMin<uchar>, IF_SIMD(_VMin8u)>(src1, step1, src2, step2, dst, step, sz)));
783 static void min8s( const schar* src1, size_t step1,
784 const schar* src2, size_t step2,
785 schar* dst, size_t step, Size sz, void* )
787 vBinOp8<schar, OpMin<schar>, IF_SIMD(_VMin8s)>(src1, step1, src2, step2, dst, step, sz);
790 static void min16u( const ushort* src1, size_t step1,
791 const ushort* src2, size_t step2,
792 ushort* dst, size_t step, Size sz, void* )
794 #if (ARITHM_USE_IPP == 1)
796 ushort* s1 = (ushort*)src1;
797 ushort* s2 = (ushort*)src2;
799 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
800 for(int i = 0; i < sz.height; i++)
802 ippsMinEvery_16u(s1, s2, d, sz.width);
803 s1 = (ushort*)((uchar*)s1 + step1);
804 s2 = (ushort*)((uchar*)s2 + step2);
805 d = (ushort*)((uchar*)d + step);
809 vBinOp16<ushort, OpMin<ushort>, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz);
812 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
813 // ippiMinEvery_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
814 // (vBinOp16<ushort, OpMin<ushort>, IF_SIMD(_VMin16u)>(src1, step1, src2, step2, dst, step, sz)));
817 static void min16s( const short* src1, size_t step1,
818 const short* src2, size_t step2,
819 short* dst, size_t step, Size sz, void* )
821 vBinOp16<short, OpMin<short>, IF_SIMD(_VMin16s)>(src1, step1, src2, step2, dst, step, sz);
824 static void min32s( const int* src1, size_t step1,
825 const int* src2, size_t step2,
826 int* dst, size_t step, Size sz, void* )
828 vBinOp32s<OpMin<int>, IF_SIMD(_VMin32s)>(src1, step1, src2, step2, dst, step, sz);
831 static void min32f( const float* src1, size_t step1,
832 const float* src2, size_t step2,
833 float* dst, size_t step, Size sz, void* )
835 #if (ARITHM_USE_IPP == 1)
837 float* s1 = (float*)src1;
838 float* s2 = (float*)src2;
840 fixSteps(sz, sizeof(dst[0]), step1, step2, step);
841 for(int i = 0; i < sz.height; i++)
843 ippsMinEvery_32f(s1, s2, d, sz.width);
844 s1 = (float*)((uchar*)s1 + step1);
845 s2 = (float*)((uchar*)s2 + step2);
846 d = (float*)((uchar*)d + step);
850 vBinOp32f<OpMin<float>, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz);
852 // IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
853 // ippiMinEvery_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (IppiSize&)sz),
854 // (vBinOp32f<OpMin<float>, IF_SIMD(_VMin32f)>(src1, step1, src2, step2, dst, step, sz)));
857 static void min64f( const double* src1, size_t step1,
858 const double* src2, size_t step2,
859 double* dst, size_t step, Size sz, void* )
861 vBinOp64f<OpMin<double>, IF_SIMD(_VMin64f)>(src1, step1, src2, step2, dst, step, sz);
864 static void absdiff8u( const uchar* src1, size_t step1,
865 const uchar* src2, size_t step2,
866 uchar* dst, size_t step, Size sz, void* )
868 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
869 ippiAbsDiff_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
870 (vBinOp8<uchar, OpAbsDiff<uchar>, IF_SIMD(_VAbsDiff8u)>(src1, step1, src2, step2, dst, step, sz)));
873 static void absdiff8s( const schar* src1, size_t step1,
874 const schar* src2, size_t step2,
875 schar* dst, size_t step, Size sz, void* )
877 vBinOp8<schar, OpAbsDiff<schar>, IF_SIMD(_VAbsDiff8s)>(src1, step1, src2, step2, dst, step, sz);
880 static void absdiff16u( const ushort* src1, size_t step1,
881 const ushort* src2, size_t step2,
882 ushort* dst, size_t step, Size sz, void* )
884 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
885 ippiAbsDiff_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
886 (vBinOp16<ushort, OpAbsDiff<ushort>, IF_SIMD(_VAbsDiff16u)>(src1, step1, src2, step2, dst, step, sz)));
889 static void absdiff16s( const short* src1, size_t step1,
890 const short* src2, size_t step2,
891 short* dst, size_t step, Size sz, void* )
893 vBinOp16<short, OpAbsDiff<short>, IF_SIMD(_VAbsDiff16s)>(src1, step1, src2, step2, dst, step, sz);
896 static void absdiff32s( const int* src1, size_t step1,
897 const int* src2, size_t step2,
898 int* dst, size_t step, Size sz, void* )
900 vBinOp32s<OpAbsDiff<int>, IF_SIMD(_VAbsDiff32s)>(src1, step1, src2, step2, dst, step, sz);
903 static void absdiff32f( const float* src1, size_t step1,
904 const float* src2, size_t step2,
905 float* dst, size_t step, Size sz, void* )
907 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
908 ippiAbsDiff_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
909 (vBinOp32f<OpAbsDiff<float>, IF_SIMD(_VAbsDiff32f)>(src1, step1, src2, step2, dst, step, sz)));
912 static void absdiff64f( const double* src1, size_t step1,
913 const double* src2, size_t step2,
914 double* dst, size_t step, Size sz, void* )
916 vBinOp64f<OpAbsDiff<double>, IF_SIMD(_VAbsDiff64f)>(src1, step1, src2, step2, dst, step, sz);
920 static void and8u( const uchar* src1, size_t step1,
921 const uchar* src2, size_t step2,
922 uchar* dst, size_t step, Size sz, void* )
924 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
925 ippiAnd_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
926 (vBinOp8<uchar, OpAnd<uchar>, IF_SIMD(_VAnd8u)>(src1, step1, src2, step2, dst, step, sz)));
929 static void or8u( const uchar* src1, size_t step1,
930 const uchar* src2, size_t step2,
931 uchar* dst, size_t step, Size sz, void* )
933 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
934 ippiOr_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
935 (vBinOp8<uchar, OpOr<uchar>, IF_SIMD(_VOr8u)>(src1, step1, src2, step2, dst, step, sz)));
938 static void xor8u( const uchar* src1, size_t step1,
939 const uchar* src2, size_t step2,
940 uchar* dst, size_t step, Size sz, void* )
942 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step);
943 ippiXor_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)sz),
944 (vBinOp8<uchar, OpXor<uchar>, IF_SIMD(_VXor8u)>(src1, step1, src2, step2, dst, step, sz)));
947 static void not8u( const uchar* src1, size_t step1,
948 const uchar* src2, size_t step2,
949 uchar* dst, size_t step, Size sz, void* )
951 IF_IPP(fixSteps(sz, sizeof(dst[0]), step1, step2, step); (void *)src2;
952 ippiNot_8u_C1R(src1, (int)step1, dst, (int)step, (IppiSize&)sz),
953 (vBinOp8<uchar, OpNot<uchar>, IF_SIMD(_VNot8u)>(src1, step1, src2, step2, dst, step, sz)));
956 /****************************************************************************************\
957 * logical operations *
958 \****************************************************************************************/
960 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
962 int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
963 size_t esz = CV_ELEM_SIZE(buftype);
964 getConvertFunc(sc.depth(), buftype)(sc.data, 0, 0, 0, scbuf, 0, Size(std::min(cn, scn), 1), 0);
968 CV_Assert( scn == 1 );
969 size_t esz1 = CV_ELEM_SIZE1(buftype);
970 for( size_t i = esz1; i < esz; i++ )
971 scbuf[i] = scbuf[i - esz1];
973 for( size_t i = esz; i < blocksize*esz; i++ )
974 scbuf[i] = scbuf[i - esz];
977 static void binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
978 InputArray _mask, const BinaryFunc* tab, bool bitwise)
980 int kind1 = _src1.kind(), kind2 = _src2.kind();
981 Mat src1 = _src1.getMat(), src2 = _src2.getMat();
982 bool haveMask = !_mask.empty(), haveScalar = false;
986 if( src1.dims <= 2 && src2.dims <= 2 && kind1 == kind2 &&
987 src1.size() == src2.size() && src1.type() == src2.type() && !haveMask )
989 _dst.create(src1.size(), src1.type());
990 Mat dst = _dst.getMat();
994 c = (int)src1.elemSize();
998 func = tab[src1.depth()];
1002 Size sz = getContinuousSize(src1, src2, dst);
1003 size_t len = sz.width*(size_t)c;
1004 if( len == (size_t)(int)len )
1006 sz.width = (int)len;
1007 func(src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, 0);
1012 if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
1013 src1.size != src2.size || src1.type() != src2.type() )
1015 if( checkScalar(src1, src2.type(), kind1, kind2) )
1016 // src1 is a scalar; swap it with src2
1018 else if( !checkScalar(src2, src1.type(), kind2, kind1) )
1019 CV_Error( CV_StsUnmatchedSizes,
1020 "The operation is neither 'array op array' (where arrays have the same size and type), "
1021 "nor 'array op scalar', nor 'scalar op array'" );
1025 size_t esz = src1.elemSize();
1026 size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
1027 int cn = src1.channels();
1028 BinaryFunc copymask = 0;
1030 bool reallocate = false;
1034 mask = _mask.getMat();
1035 CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
1036 CV_Assert( mask.size == src1.size );
1037 copymask = getCopyMaskFunc(esz);
1038 Mat tdst = _dst.getMat();
1039 reallocate = tdst.size != src1.size || tdst.type() != src1.type();
1042 AutoBuffer<uchar> _buf;
1043 uchar *scbuf = 0, *maskbuf = 0;
1045 _dst.create(src1.dims, src1.size, src1.type());
1046 Mat dst = _dst.getMat();
1048 // if this is mask operation and dst has been reallocated,
1050 if( haveMask && reallocate )
1051 dst = Scalar::all(0);
1060 func = tab[src1.depth()];
1066 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
1069 NAryMatIterator it(arrays, ptrs);
1070 size_t total = it.size, blocksize = total;
1072 if( blocksize*c > INT_MAX )
1073 blocksize = INT_MAX/c;
1077 blocksize = std::min(blocksize, blocksize0);
1078 _buf.allocate(blocksize*esz);
1082 for( size_t i = 0; i < it.nplanes; i++, ++it )
1084 for( size_t j = 0; j < total; j += blocksize )
1086 int bsz = (int)MIN(total - j, blocksize);
1088 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, Size(bsz*c, 1), 0 );
1091 copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
1096 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz;
1102 const Mat* arrays[] = { &src1, &dst, &mask, 0 };
1105 NAryMatIterator it(arrays, ptrs);
1106 size_t total = it.size, blocksize = std::min(total, blocksize0);
1108 _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32);
1110 maskbuf = alignPtr(scbuf + blocksize*esz, 16);
1112 convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize);
1114 for( size_t i = 0; i < it.nplanes; i++, ++it )
1116 for( size_t j = 0; j < total; j += blocksize )
1118 int bsz = (int)MIN(total - j, blocksize);
1120 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, Size(bsz*c, 1), 0 );
1123 copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
1128 ptrs[0] += bsz; ptrs[1] += bsz;
1134 static BinaryFunc* getMaxTab()
1136 static BinaryFunc maxTab[] =
1138 (BinaryFunc)GET_OPTIMIZED(max8u), (BinaryFunc)GET_OPTIMIZED(max8s),
1139 (BinaryFunc)GET_OPTIMIZED(max16u), (BinaryFunc)GET_OPTIMIZED(max16s),
1140 (BinaryFunc)GET_OPTIMIZED(max32s),
1141 (BinaryFunc)GET_OPTIMIZED(max32f), (BinaryFunc)max64f,
1148 static BinaryFunc* getMinTab()
1150 static BinaryFunc minTab[] =
1152 (BinaryFunc)GET_OPTIMIZED(min8u), (BinaryFunc)GET_OPTIMIZED(min8s),
1153 (BinaryFunc)GET_OPTIMIZED(min16u), (BinaryFunc)GET_OPTIMIZED(min16s),
1154 (BinaryFunc)GET_OPTIMIZED(min32s),
1155 (BinaryFunc)GET_OPTIMIZED(min32f), (BinaryFunc)min64f,
1164 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
1166 BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(and8u);
1167 binary_op(a, b, c, mask, &f, true);
1170 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
1172 BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(or8u);
1173 binary_op(a, b, c, mask, &f, true);
1176 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
1178 BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(xor8u);
1179 binary_op(a, b, c, mask, &f, true);
1182 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
1184 BinaryFunc f = (BinaryFunc)GET_OPTIMIZED(not8u);
1185 binary_op(a, a, c, mask, &f, true);
1188 void cv::max( InputArray src1, InputArray src2, OutputArray dst )
1190 binary_op(src1, src2, dst, noArray(), getMaxTab(), false );
1193 void cv::min( InputArray src1, InputArray src2, OutputArray dst )
1195 binary_op(src1, src2, dst, noArray(), getMinTab(), false );
1198 void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
1200 OutputArray _dst(dst);
1201 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false );
1204 void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
1206 OutputArray _dst(dst);
1207 binary_op(src1, src2, _dst, noArray(), getMinTab(), false );
1210 void cv::max(const Mat& src1, double src2, Mat& dst)
1212 OutputArray _dst(dst);
1213 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false );
1216 void cv::min(const Mat& src1, double src2, Mat& dst)
1218 OutputArray _dst(dst);
1219 binary_op(src1, src2, _dst, noArray(), getMinTab(), false );
1222 /****************************************************************************************\
1224 \****************************************************************************************/
1229 static int actualScalarDepth(const double* data, int len)
1231 int i = 0, minval = INT_MAX, maxval = INT_MIN;
1234 int ival = cvRound(data[i]);
1235 if( ival != data[i] )
1237 minval = MIN(minval, ival);
1238 maxval = MAX(maxval, ival);
1240 return i < len ? CV_64F :
1241 minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U :
1242 minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S :
1243 minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U :
1244 minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S :
1248 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
1249 InputArray _mask, int dtype, BinaryFunc* tab, bool muldiv=false, void* usrdata=0)
1251 int kind1 = _src1.kind(), kind2 = _src2.kind();
1252 Mat src1 = _src1.getMat(), src2 = _src2.getMat();
1253 bool haveMask = !_mask.empty();
1254 bool reallocate = false;
1256 bool src1Scalar = checkScalar(src1, src2.type(), kind1, kind2);
1257 bool src2Scalar = checkScalar(src2, src1.type(), kind2, kind1);
1259 if( (kind1 == kind2 || src1.channels() == 1) && src1.dims <= 2 && src2.dims <= 2 &&
1260 src1.size() == src2.size() && src1.type() == src2.type() &&
1261 !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == src1.depth())) ||
1262 (_dst.fixedType() && _dst.type() == _src1.type())) &&
1263 ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
1265 _dst.create(src1.size(), src1.type());
1266 Mat dst = _dst.getMat();
1267 Size sz = getContinuousSize(src1, src2, dst, src1.channels());
1268 tab[src1.depth()](src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, usrdata);
1272 bool haveScalar = false, swapped12 = false;
1273 int depth2 = src2.depth();
1274 if( src1.size != src2.size || src1.channels() != src2.channels() ||
1275 (kind1 == _InputArray::MATX && (src1.size() == Size(1,4) || src1.size() == Size(1,1))) ||
1276 (kind2 == _InputArray::MATX && (src2.size() == Size(1,4) || src2.size() == Size(1,1))) )
1278 if( checkScalar(src1, src2.type(), kind1, kind2) )
1280 // src1 is a scalar; swap it with src2
1284 else if( !checkScalar(src2, src1.type(), kind2, kind1) )
1285 CV_Error( CV_StsUnmatchedSizes,
1286 "The operation is neither 'array op array' (where arrays have the same size and the same number of channels), "
1287 "nor 'array op scalar', nor 'scalar op array'" );
1289 CV_Assert(src2.type() == CV_64F && (src2.rows == 4 || src2.rows == 1));
1293 depth2 = actualScalarDepth(src2.ptr<double>(), src1.channels());
1294 if( depth2 == CV_64F && (src1.depth() < CV_32S || src1.depth() == CV_32F) )
1301 int cn = src1.channels(), depth1 = src1.depth(), wtype;
1302 BinaryFunc cvtsrc1 = 0, cvtsrc2 = 0, cvtdst = 0;
1306 if( _dst.fixedType() )
1307 dtype = _dst.type();
1310 if( !haveScalar && src1.type() != src2.type() )
1311 CV_Error(CV_StsBadArg,
1312 "When the input arrays in add/subtract/multiply/divide functions have different types, "
1313 "the output array type must be explicitly specified");
1314 dtype = src1.type();
1317 dtype = CV_MAT_DEPTH(dtype);
1319 if( depth1 == depth2 && dtype == depth1 )
1323 wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S :
1324 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2);
1325 wtype = std::max(wtype, dtype);
1327 // when the result of addition should be converted to an integer type,
1328 // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
1329 // instead of converting the other input to floating-point and then converting the operation result back to integers.
1330 if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) )
1335 wtype = std::max(depth1, std::max(depth2, CV_32F));
1336 wtype = std::max(wtype, dtype);
1339 cvtsrc1 = depth1 == wtype ? 0 : getConvertFunc(depth1, wtype);
1340 cvtsrc2 = depth2 == depth1 ? cvtsrc1 : depth2 == wtype ? 0 : getConvertFunc(depth2, wtype);
1341 cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
1343 dtype = CV_MAKETYPE(dtype, cn);
1344 wtype = CV_MAKETYPE(wtype, cn);
1346 size_t esz1 = src1.elemSize(), esz2 = src2.elemSize();
1347 size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
1348 size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
1349 BinaryFunc copymask = 0;
1354 mask = _mask.getMat();
1355 CV_Assert( (mask.type() == CV_8UC1 || mask.type() == CV_8SC1) );
1356 CV_Assert( mask.size == src1.size );
1357 copymask = getCopyMaskFunc(dsz);
1358 Mat tdst = _dst.getMat();
1359 reallocate = tdst.size != src1.size || tdst.type() != dtype;
1362 AutoBuffer<uchar> _buf;
1363 uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
1364 size_t bufesz = (cvtsrc1 ? wsz : 0) + (cvtsrc2 || haveScalar ? wsz : 0) + (cvtdst ? wsz : 0) + (haveMask ? dsz : 0);
1366 _dst.create(src1.dims, src1.size, dtype);
1367 Mat dst = _dst.getMat();
1369 if( haveMask && reallocate )
1370 dst = Scalar::all(0);
1372 BinaryFunc func = tab[CV_MAT_DEPTH(wtype)];
1376 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
1379 NAryMatIterator it(arrays, ptrs);
1380 size_t total = it.size, blocksize = total;
1382 if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst )
1383 blocksize = std::min(blocksize, blocksize0);
1385 _buf.allocate(bufesz*blocksize + 64);
1388 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
1390 buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
1391 wbuf = maskbuf = buf;
1393 buf = alignPtr(buf + blocksize*wsz, 16);
1397 for( size_t i = 0; i < it.nplanes; i++, ++it )
1399 for( size_t j = 0; j < total; j += blocksize )
1401 int bsz = (int)MIN(total - j, blocksize);
1402 Size bszn(bsz*cn, 1);
1403 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
1404 uchar* dptr = ptrs[2];
1407 cvtsrc1( sptr1, 0, 0, 0, buf1, 0, bszn, 0 );
1410 if( ptrs[0] == ptrs[1] )
1414 cvtsrc2( sptr2, 0, 0, 0, buf2, 0, bszn, 0 );
1418 if( !haveMask && !cvtdst )
1419 func( sptr1, 0, sptr2, 0, dptr, 0, bszn, usrdata );
1422 func( sptr1, 0, sptr2, 0, wbuf, 0, bszn, usrdata );
1424 cvtdst( wbuf, 0, 0, 0, dptr, 0, bszn, 0 );
1427 copymask( wbuf, 0, ptrs[3], 0, dptr, 0, Size(bsz, 1), &dsz );
1432 cvtdst( wbuf, 0, 0, 0, maskbuf, 0, bszn, 0 );
1433 copymask( maskbuf, 0, ptrs[3], 0, dptr, 0, Size(bsz, 1), &dsz );
1437 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
1443 const Mat* arrays[] = { &src1, &dst, &mask, 0 };
1446 NAryMatIterator it(arrays, ptrs);
1447 size_t total = it.size, blocksize = std::min(total, blocksize0);
1449 _buf.allocate(bufesz*blocksize + 64);
1452 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
1453 buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
1454 wbuf = maskbuf = buf;
1456 buf = alignPtr(buf + blocksize*wsz, 16);
1460 convertAndUnrollScalar( src2, wtype, buf2, blocksize);
1462 for( size_t i = 0; i < it.nplanes; i++, ++it )
1464 for( size_t j = 0; j < total; j += blocksize )
1466 int bsz = (int)MIN(total - j, blocksize);
1467 Size bszn(bsz*cn, 1);
1468 const uchar *sptr1 = ptrs[0];
1469 const uchar* sptr2 = buf2;
1470 uchar* dptr = ptrs[1];
1474 cvtsrc1( sptr1, 0, 0, 0, buf1, 0, bszn, 0 );
1479 std::swap(sptr1, sptr2);
1481 if( !haveMask && !cvtdst )
1482 func( sptr1, 0, sptr2, 0, dptr, 0, bszn, usrdata );
1485 func( sptr1, 0, sptr2, 0, wbuf, 0, bszn, usrdata );
1487 cvtdst( wbuf, 0, 0, 0, dptr, 0, bszn, 0 );
1490 copymask( wbuf, 0, ptrs[2], 0, dptr, 0, Size(bsz, 1), &dsz );
1495 cvtdst( wbuf, 0, 0, 0, maskbuf, 0, bszn, 0 );
1496 copymask( maskbuf, 0, ptrs[2], 0, dptr, 0, Size(bsz, 1), &dsz );
1500 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
1506 static BinaryFunc* getAddTab()
1508 static BinaryFunc addTab[] =
1510 (BinaryFunc)GET_OPTIMIZED(add8u), (BinaryFunc)GET_OPTIMIZED(add8s),
1511 (BinaryFunc)GET_OPTIMIZED(add16u), (BinaryFunc)GET_OPTIMIZED(add16s),
1512 (BinaryFunc)GET_OPTIMIZED(add32s),
1513 (BinaryFunc)GET_OPTIMIZED(add32f), (BinaryFunc)add64f,
1520 static BinaryFunc* getSubTab()
1522 static BinaryFunc subTab[] =
1524 (BinaryFunc)GET_OPTIMIZED(sub8u), (BinaryFunc)GET_OPTIMIZED(sub8s),
1525 (BinaryFunc)GET_OPTIMIZED(sub16u), (BinaryFunc)GET_OPTIMIZED(sub16s),
1526 (BinaryFunc)GET_OPTIMIZED(sub32s),
1527 (BinaryFunc)GET_OPTIMIZED(sub32f), (BinaryFunc)sub64f,
1534 static BinaryFunc* getAbsDiffTab()
1536 static BinaryFunc absDiffTab[] =
1538 (BinaryFunc)GET_OPTIMIZED(absdiff8u), (BinaryFunc)GET_OPTIMIZED(absdiff8s),
1539 (BinaryFunc)GET_OPTIMIZED(absdiff16u), (BinaryFunc)GET_OPTIMIZED(absdiff16s),
1540 (BinaryFunc)GET_OPTIMIZED(absdiff32s),
1541 (BinaryFunc)GET_OPTIMIZED(absdiff32f), (BinaryFunc)absdiff64f,
1550 void cv::add( InputArray src1, InputArray src2, OutputArray dst,
1551 InputArray mask, int dtype )
1553 arithm_op(src1, src2, dst, mask, dtype, getAddTab() );
1556 void cv::subtract( InputArray src1, InputArray src2, OutputArray dst,
1557 InputArray mask, int dtype )
1559 #ifdef HAVE_TEGRA_OPTIMIZATION
1560 if (mask.empty() && src1.depth() == CV_8U && src2.depth() == CV_8U)
1562 if (dtype == -1 && dst.fixedType())
1563 dtype = dst.depth();
1565 if (!dst.fixedType() || dtype == dst.depth())
1567 if (dtype == CV_16S)
1569 Mat _dst = dst.getMat();
1570 if(tegra::subtract_8u8u16s(src1.getMat(), src2.getMat(), _dst))
1573 else if (dtype == CV_32F)
1575 Mat _dst = dst.getMat();
1576 if(tegra::subtract_8u8u32f(src1.getMat(), src2.getMat(), _dst))
1579 else if (dtype == CV_8S)
1581 Mat _dst = dst.getMat();
1582 if(tegra::subtract_8u8u8s(src1.getMat(), src2.getMat(), _dst))
1588 arithm_op(src1, src2, dst, mask, dtype, getSubTab() );
1591 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
1593 arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab());
1596 /****************************************************************************************\
1598 \****************************************************************************************/
1603 template<typename T, typename WT> static void
1604 mul_( const T* src1, size_t step1, const T* src2, size_t step2,
1605 T* dst, size_t step, Size size, WT scale )
1607 step1 /= sizeof(src1[0]);
1608 step2 /= sizeof(src2[0]);
1609 step /= sizeof(dst[0]);
1611 if( scale == (WT)1. )
1613 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1616 #if CV_ENABLE_UNROLLED
1617 for(; i <= size.width - 4; i += 4 )
1621 t0 = saturate_cast<T>(src1[i ] * src2[i ]);
1622 t1 = saturate_cast<T>(src1[i+1] * src2[i+1]);
1626 t0 = saturate_cast<T>(src1[i+2] * src2[i+2]);
1627 t1 = saturate_cast<T>(src1[i+3] * src2[i+3]);
1632 for( ; i < size.width; i++ )
1633 dst[i] = saturate_cast<T>(src1[i] * src2[i]);
1638 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1641 #if CV_ENABLE_UNROLLED
1642 for(; i <= size.width - 4; i += 4 )
1644 T t0 = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
1645 T t1 = saturate_cast<T>(scale*(WT)src1[i+1]*src2[i+1]);
1646 dst[i] = t0; dst[i+1] = t1;
1648 t0 = saturate_cast<T>(scale*(WT)src1[i+2]*src2[i+2]);
1649 t1 = saturate_cast<T>(scale*(WT)src1[i+3]*src2[i+3]);
1650 dst[i+2] = t0; dst[i+3] = t1;
1653 for( ; i < size.width; i++ )
1654 dst[i] = saturate_cast<T>(scale*(WT)src1[i]*src2[i]);
1659 template<typename T> static void
1660 div_( const T* src1, size_t step1, const T* src2, size_t step2,
1661 T* dst, size_t step, Size size, double scale )
1663 step1 /= sizeof(src1[0]);
1664 step2 /= sizeof(src2[0]);
1665 step /= sizeof(dst[0]);
1667 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1670 #if CV_ENABLE_UNROLLED
1671 for( ; i <= size.width - 4; i += 4 )
1673 if( src2[i] != 0 && src2[i+1] != 0 && src2[i+2] != 0 && src2[i+3] != 0 )
1675 double a = (double)src2[i] * src2[i+1];
1676 double b = (double)src2[i+2] * src2[i+3];
1677 double d = scale/(a * b);
1681 T z0 = saturate_cast<T>(src2[i+1] * ((double)src1[i] * b));
1682 T z1 = saturate_cast<T>(src2[i] * ((double)src1[i+1] * b));
1683 T z2 = saturate_cast<T>(src2[i+3] * ((double)src1[i+2] * a));
1684 T z3 = saturate_cast<T>(src2[i+2] * ((double)src1[i+3] * a));
1686 dst[i] = z0; dst[i+1] = z1;
1687 dst[i+2] = z2; dst[i+3] = z3;
1691 T z0 = src2[i] != 0 ? saturate_cast<T>(src1[i]*scale/src2[i]) : 0;
1692 T z1 = src2[i+1] != 0 ? saturate_cast<T>(src1[i+1]*scale/src2[i+1]) : 0;
1693 T z2 = src2[i+2] != 0 ? saturate_cast<T>(src1[i+2]*scale/src2[i+2]) : 0;
1694 T z3 = src2[i+3] != 0 ? saturate_cast<T>(src1[i+3]*scale/src2[i+3]) : 0;
1696 dst[i] = z0; dst[i+1] = z1;
1697 dst[i+2] = z2; dst[i+3] = z3;
1701 for( ; i < size.width; i++ )
1702 dst[i] = src2[i] != 0 ? saturate_cast<T>(src1[i]*scale/src2[i]) : 0;
1706 template<typename T> static void
1707 recip_( const T*, size_t, const T* src2, size_t step2,
1708 T* dst, size_t step, Size size, double scale )
1710 step2 /= sizeof(src2[0]);
1711 step /= sizeof(dst[0]);
1713 for( ; size.height--; src2 += step2, dst += step )
1716 #if CV_ENABLE_UNROLLED
1717 for( ; i <= size.width - 4; i += 4 )
1719 if( src2[i] != 0 && src2[i+1] != 0 && src2[i+2] != 0 && src2[i+3] != 0 )
1721 double a = (double)src2[i] * src2[i+1];
1722 double b = (double)src2[i+2] * src2[i+3];
1723 double d = scale/(a * b);
1727 T z0 = saturate_cast<T>(src2[i+1] * b);
1728 T z1 = saturate_cast<T>(src2[i] * b);
1729 T z2 = saturate_cast<T>(src2[i+3] * a);
1730 T z3 = saturate_cast<T>(src2[i+2] * a);
1732 dst[i] = z0; dst[i+1] = z1;
1733 dst[i+2] = z2; dst[i+3] = z3;
1737 T z0 = src2[i] != 0 ? saturate_cast<T>(scale/src2[i]) : 0;
1738 T z1 = src2[i+1] != 0 ? saturate_cast<T>(scale/src2[i+1]) : 0;
1739 T z2 = src2[i+2] != 0 ? saturate_cast<T>(scale/src2[i+2]) : 0;
1740 T z3 = src2[i+3] != 0 ? saturate_cast<T>(scale/src2[i+3]) : 0;
1742 dst[i] = z0; dst[i+1] = z1;
1743 dst[i+2] = z2; dst[i+3] = z3;
1747 for( ; i < size.width; i++ )
1748 dst[i] = src2[i] != 0 ? saturate_cast<T>(scale/src2[i]) : 0;
1753 static void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
1754 uchar* dst, size_t step, Size sz, void* scale)
1756 mul_(src1, step1, src2, step2, dst, step, sz, (float)*(const double*)scale);
1759 static void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
1760 schar* dst, size_t step, Size sz, void* scale)
1762 mul_(src1, step1, src2, step2, dst, step, sz, (float)*(const double*)scale);
1765 static void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
1766 ushort* dst, size_t step, Size sz, void* scale)
1768 mul_(src1, step1, src2, step2, dst, step, sz, (float)*(const double*)scale);
1771 static void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
1772 short* dst, size_t step, Size sz, void* scale)
1774 mul_(src1, step1, src2, step2, dst, step, sz, (float)*(const double*)scale);
1777 static void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
1778 int* dst, size_t step, Size sz, void* scale)
1780 mul_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1783 static void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
1784 float* dst, size_t step, Size sz, void* scale)
1786 mul_(src1, step1, src2, step2, dst, step, sz, (float)*(const double*)scale);
1789 static void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
1790 double* dst, size_t step, Size sz, void* scale)
1792 mul_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1795 static void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
1796 uchar* dst, size_t step, Size sz, void* scale)
1799 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1801 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1804 static void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
1805 schar* dst, size_t step, Size sz, void* scale)
1807 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1810 static void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
1811 ushort* dst, size_t step, Size sz, void* scale)
1813 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1816 static void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
1817 short* dst, size_t step, Size sz, void* scale)
1819 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1822 static void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
1823 int* dst, size_t step, Size sz, void* scale)
1825 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1828 static void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
1829 float* dst, size_t step, Size sz, void* scale)
1831 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1834 static void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
1835 double* dst, size_t step, Size sz, void* scale)
1837 div_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1840 static void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
1841 uchar* dst, size_t step, Size sz, void* scale)
1843 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1846 static void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
1847 schar* dst, size_t step, Size sz, void* scale)
1849 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1852 static void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
1853 ushort* dst, size_t step, Size sz, void* scale)
1855 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1858 static void recip16s( const short* src1, size_t step1, const short* src2, size_t step2,
1859 short* dst, size_t step, Size sz, void* scale)
1861 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1864 static void recip32s( const int* src1, size_t step1, const int* src2, size_t step2,
1865 int* dst, size_t step, Size sz, void* scale)
1867 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1870 static void recip32f( const float* src1, size_t step1, const float* src2, size_t step2,
1871 float* dst, size_t step, Size sz, void* scale)
1873 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1876 static void recip64f( const double* src1, size_t step1, const double* src2, size_t step2,
1877 double* dst, size_t step, Size sz, void* scale)
1879 recip_(src1, step1, src2, step2, dst, step, sz, *(const double*)scale);
1883 static BinaryFunc* getMulTab()
1885 static BinaryFunc mulTab[] =
1887 (BinaryFunc)mul8u, (BinaryFunc)mul8s, (BinaryFunc)mul16u,
1888 (BinaryFunc)mul16s, (BinaryFunc)mul32s, (BinaryFunc)mul32f,
1889 (BinaryFunc)mul64f, 0
1895 static BinaryFunc* getDivTab()
1897 static BinaryFunc divTab[] =
1899 (BinaryFunc)div8u, (BinaryFunc)div8s, (BinaryFunc)div16u,
1900 (BinaryFunc)div16s, (BinaryFunc)div32s, (BinaryFunc)div32f,
1901 (BinaryFunc)div64f, 0
1907 static BinaryFunc* getRecipTab()
1909 static BinaryFunc recipTab[] =
1911 (BinaryFunc)recip8u, (BinaryFunc)recip8s, (BinaryFunc)recip16u,
1912 (BinaryFunc)recip16s, (BinaryFunc)recip32s, (BinaryFunc)recip32f,
1913 (BinaryFunc)recip64f, 0
1921 void cv::multiply(InputArray src1, InputArray src2,
1922 OutputArray dst, double scale, int dtype)
1924 arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), true, &scale);
1927 void cv::divide(InputArray src1, InputArray src2,
1928 OutputArray dst, double scale, int dtype)
1930 arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale);
1933 void cv::divide(double scale, InputArray src2,
1934 OutputArray dst, int dtype)
1936 arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale);
1939 /****************************************************************************************\
1941 \****************************************************************************************/
1946 template<typename T, typename WT> static void
1947 addWeighted_( const T* src1, size_t step1, const T* src2, size_t step2,
1948 T* dst, size_t step, Size size, void* _scalars )
1950 const double* scalars = (const double*)_scalars;
1951 WT alpha = (WT)scalars[0], beta = (WT)scalars[1], gamma = (WT)scalars[2];
1952 step1 /= sizeof(src1[0]);
1953 step2 /= sizeof(src2[0]);
1954 step /= sizeof(dst[0]);
1956 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1959 #if CV_ENABLE_UNROLLED
1960 for( ; x <= size.width - 4; x += 4 )
1962 T t0 = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
1963 T t1 = saturate_cast<T>(src1[x+1]*alpha + src2[x+1]*beta + gamma);
1964 dst[x] = t0; dst[x+1] = t1;
1966 t0 = saturate_cast<T>(src1[x+2]*alpha + src2[x+2]*beta + gamma);
1967 t1 = saturate_cast<T>(src1[x+3]*alpha + src2[x+3]*beta + gamma);
1968 dst[x+2] = t0; dst[x+3] = t1;
1971 for( ; x < size.width; x++ )
1972 dst[x] = saturate_cast<T>(src1[x]*alpha + src2[x]*beta + gamma);
1978 addWeighted8u( const uchar* src1, size_t step1,
1979 const uchar* src2, size_t step2,
1980 uchar* dst, size_t step, Size size,
1983 const double* scalars = (const double*)_scalars;
1984 float alpha = (float)scalars[0], beta = (float)scalars[1], gamma = (float)scalars[2];
1986 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
1993 __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
1994 __m128i z = _mm_setzero_si128();
1996 for( ; x <= size.width - 8; x += 8 )
1998 __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
1999 __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
2001 __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
2002 __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
2003 __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
2004 __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
2006 u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
2007 u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
2008 u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
2010 u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
2011 u = _mm_packus_epi16(u, u);
2013 _mm_storel_epi64((__m128i*)(dst + x), u);
2017 #if CV_ENABLE_UNROLLED
2018 for( ; x <= size.width - 4; x += 4 )
2021 t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
2022 t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
2024 dst[x] = saturate_cast<uchar>(t0);
2025 dst[x+1] = saturate_cast<uchar>(t1);
2027 t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
2028 t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
2030 dst[x+2] = saturate_cast<uchar>(t0);
2031 dst[x+3] = saturate_cast<uchar>(t1);
2035 for( ; x < size.width; x++ )
2037 float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
2038 dst[x] = saturate_cast<uchar>(t0);
2043 static void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
2044 schar* dst, size_t step, Size sz, void* scalars )
2046 addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, sz, scalars);
2049 static void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2050 ushort* dst, size_t step, Size sz, void* scalars )
2052 addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, sz, scalars);
2055 static void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
2056 short* dst, size_t step, Size sz, void* scalars )
2058 addWeighted_<short, float>(src1, step1, src2, step2, dst, step, sz, scalars);
2061 static void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
2062 int* dst, size_t step, Size sz, void* scalars )
2064 addWeighted_<int, double>(src1, step1, src2, step2, dst, step, sz, scalars);
2067 static void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
2068 float* dst, size_t step, Size sz, void* scalars )
2070 addWeighted_<float, double>(src1, step1, src2, step2, dst, step, sz, scalars);
2073 static void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
2074 double* dst, size_t step, Size sz, void* scalars )
2076 addWeighted_<double, double>(src1, step1, src2, step2, dst, step, sz, scalars);
2079 static BinaryFunc* getAddWeightedTab()
2081 static BinaryFunc addWeightedTab[] =
2083 (BinaryFunc)GET_OPTIMIZED(addWeighted8u), (BinaryFunc)GET_OPTIMIZED(addWeighted8s), (BinaryFunc)GET_OPTIMIZED(addWeighted16u),
2084 (BinaryFunc)GET_OPTIMIZED(addWeighted16s), (BinaryFunc)GET_OPTIMIZED(addWeighted32s), (BinaryFunc)addWeighted32f,
2085 (BinaryFunc)addWeighted64f, 0
2088 return addWeightedTab;
2093 void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
2094 double beta, double gamma, OutputArray dst, int dtype )
2096 double scalars[] = {alpha, beta, gamma};
2097 arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars);
2101 /****************************************************************************************\
2103 \****************************************************************************************/
2108 template<typename T> static void
2109 cmp_(const T* src1, size_t step1, const T* src2, size_t step2,
2110 uchar* dst, size_t step, Size size, int code)
2112 step1 /= sizeof(src1[0]);
2113 step2 /= sizeof(src2[0]);
2114 if( code == CMP_GE || code == CMP_LT )
2116 std::swap(src1, src2);
2117 std::swap(step1, step2);
2118 code = code == CMP_GE ? CMP_LE : CMP_GT;
2121 if( code == CMP_GT || code == CMP_LE )
2123 int m = code == CMP_GT ? 0 : 255;
2124 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2127 #if CV_ENABLE_UNROLLED
2128 for( ; x <= size.width - 4; x += 4 )
2131 t0 = -(src1[x] > src2[x]) ^ m;
2132 t1 = -(src1[x+1] > src2[x+1]) ^ m;
2133 dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
2134 t0 = -(src1[x+2] > src2[x+2]) ^ m;
2135 t1 = -(src1[x+3] > src2[x+3]) ^ m;
2136 dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
2139 for( ; x < size.width; x++ )
2140 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
2143 else if( code == CMP_EQ || code == CMP_NE )
2145 int m = code == CMP_EQ ? 0 : 255;
2146 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2149 #if CV_ENABLE_UNROLLED
2150 for( ; x <= size.width - 4; x += 4 )
2153 t0 = -(src1[x] == src2[x]) ^ m;
2154 t1 = -(src1[x+1] == src2[x+1]) ^ m;
2155 dst[x] = (uchar)t0; dst[x+1] = (uchar)t1;
2156 t0 = -(src1[x+2] == src2[x+2]) ^ m;
2157 t1 = -(src1[x+3] == src2[x+3]) ^ m;
2158 dst[x+2] = (uchar)t0; dst[x+3] = (uchar)t1;
2161 for( ; x < size.width; x++ )
2162 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
2168 inline static IppCmpOp convert_cmp(int _cmpop)
2170 return _cmpop == CMP_EQ ? ippCmpEq :
2171 _cmpop == CMP_GT ? ippCmpGreater :
2172 _cmpop == CMP_GE ? ippCmpGreaterEq :
2173 _cmpop == CMP_LT ? ippCmpLess :
2174 _cmpop == CMP_LE ? ippCmpLessEq :
2179 static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2180 uchar* dst, size_t step, Size size, void* _cmpop)
2183 IppCmpOp op = convert_cmp(*(int *)_cmpop);
2186 fixSteps(size, sizeof(dst[0]), step1, step2, step);
2187 if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
2191 //vz optimized cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2192 int code = *(int*)_cmpop;
2193 step1 /= sizeof(src1[0]);
2194 step2 /= sizeof(src2[0]);
2195 if( code == CMP_GE || code == CMP_LT )
2197 std::swap(src1, src2);
2198 std::swap(step1, step2);
2199 code = code == CMP_GE ? CMP_LE : CMP_GT;
2202 if( code == CMP_GT || code == CMP_LE )
2204 int m = code == CMP_GT ? 0 : 255;
2205 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2210 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
2211 __m128i c128 = _mm_set1_epi8 (-128);
2212 for( ; x <= size.width - 16; x += 16 )
2214 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2215 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2216 // no simd for 8u comparison, that's why we need the trick
2217 r00 = _mm_sub_epi8(r00,c128);
2218 r10 = _mm_sub_epi8(r10,c128);
2220 r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
2221 _mm_storeu_si128((__m128i*)(dst + x),r00);
2227 for( ; x < size.width; x++ ){
2228 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
2232 else if( code == CMP_EQ || code == CMP_NE )
2234 int m = code == CMP_EQ ? 0 : 255;
2235 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2240 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
2241 for( ; x <= size.width - 16; x += 16 )
2243 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2244 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2245 r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
2246 _mm_storeu_si128((__m128i*)(dst + x), r00);
2250 for( ; x < size.width; x++ )
2251 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
2256 static void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
2257 uchar* dst, size_t step, Size size, void* _cmpop)
2259 cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2262 static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2263 uchar* dst, size_t step, Size size, void* _cmpop)
2266 IppCmpOp op = convert_cmp(*(int *)_cmpop);
2269 fixSteps(size, sizeof(dst[0]), step1, step2, step);
2270 if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
2274 cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2277 static void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
2278 uchar* dst, size_t step, Size size, void* _cmpop)
2281 IppCmpOp op = convert_cmp(*(int *)_cmpop);
2284 fixSteps(size, sizeof(dst[0]), step1, step2, step);
2285 if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
2289 //vz optimized cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2291 int code = *(int*)_cmpop;
2292 step1 /= sizeof(src1[0]);
2293 step2 /= sizeof(src2[0]);
2294 if( code == CMP_GE || code == CMP_LT )
2296 std::swap(src1, src2);
2297 std::swap(step1, step2);
2298 code = code == CMP_GE ? CMP_LE : CMP_GT;
2301 if( code == CMP_GT || code == CMP_LE )
2303 int m = code == CMP_GT ? 0 : 255;
2304 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2309 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
2310 for( ; x <= size.width - 16; x += 16 )
2312 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2313 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2314 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
2315 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
2316 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
2317 r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
2318 r11 = _mm_packs_epi16(r00, r01);
2319 _mm_storeu_si128((__m128i*)(dst + x), r11);
2321 if( x <= size.width-8)
2323 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2324 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2325 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
2326 r10 = _mm_packs_epi16(r00, r00);
2327 _mm_storel_epi64((__m128i*)(dst + x), r10);
2334 for( ; x < size.width; x++ ){
2335 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
2339 else if( code == CMP_EQ || code == CMP_NE )
2341 int m = code == CMP_EQ ? 0 : 255;
2342 for( ; size.height--; src1 += step1, src2 += step2, dst += step )
2347 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
2348 for( ; x <= size.width - 16; x += 16 )
2350 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2351 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2352 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
2353 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
2354 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
2355 r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
2356 r11 = _mm_packs_epi16(r00, r01);
2357 _mm_storeu_si128((__m128i*)(dst + x), r11);
2359 if( x <= size.width - 8)
2361 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
2362 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
2363 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
2364 r10 = _mm_packs_epi16(r00, r00);
2365 _mm_storel_epi64((__m128i*)(dst + x), r10);
2371 for( ; x < size.width; x++ )
2372 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
2377 static void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
2378 uchar* dst, size_t step, Size size, void* _cmpop)
2380 cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2383 static void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
2384 uchar* dst, size_t step, Size size, void* _cmpop)
2387 IppCmpOp op = convert_cmp(*(int *)_cmpop);
2390 fixSteps(size, sizeof(dst[0]), step1, step2, step);
2391 if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 )
2395 cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2398 static void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
2399 uchar* dst, size_t step, Size size, void* _cmpop)
2401 cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop);
2404 static BinaryFunc getCmpFunc(int depth)
2406 static BinaryFunc cmpTab[] =
2408 (BinaryFunc)GET_OPTIMIZED(cmp8u), (BinaryFunc)GET_OPTIMIZED(cmp8s),
2409 (BinaryFunc)GET_OPTIMIZED(cmp16u), (BinaryFunc)GET_OPTIMIZED(cmp16s),
2410 (BinaryFunc)GET_OPTIMIZED(cmp32s),
2411 (BinaryFunc)GET_OPTIMIZED(cmp32f), (BinaryFunc)cmp64f,
2415 return cmpTab[depth];
2418 static double getMinVal(int depth)
2420 static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0};
2424 static double getMaxVal(int depth)
2426 static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0};
2432 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
2434 CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
2435 op == CMP_NE || op == CMP_GE || op == CMP_GT );
2437 int kind1 = _src1.kind(), kind2 = _src2.kind();
2438 Mat src1 = _src1.getMat(), src2 = _src2.getMat();
2440 if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() )
2442 int cn = src1.channels();
2443 _dst.create(src1.size(), CV_8UC(cn));
2444 Mat dst = _dst.getMat();
2445 Size sz = getContinuousSize(src1, src2, dst, src1.channels());
2446 getCmpFunc(src1.depth())(src1.data, src1.step, src2.data, src2.step, dst.data, dst.step, sz, &op);
2450 bool haveScalar = false;
2452 if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
2453 src1.size != src2.size || src1.type() != src2.type() )
2455 if( checkScalar(src1, src2.type(), kind1, kind2) )
2457 // src1 is a scalar; swap it with src2
2459 op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE :
2460 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op;
2462 else if( !checkScalar(src2, src1.type(), kind2, kind1) )
2463 CV_Error( CV_StsUnmatchedSizes,
2464 "The operation is neither 'array op array' (where arrays have the same size and the same type), "
2465 "nor 'array op scalar', nor 'scalar op array'" );
2470 int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth();
2472 _dst.create(src1.dims, src1.size, CV_8UC(cn));
2473 src1 = src1.reshape(1); src2 = src2.reshape(1);
2474 Mat dst = _dst.getMat().reshape(1);
2476 size_t esz = src1.elemSize();
2477 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
2478 BinaryFunc func = getCmpFunc(depth1);
2482 const Mat* arrays[] = { &src1, &src2, &dst, 0 };
2485 NAryMatIterator it(arrays, ptrs);
2486 size_t total = it.size;
2488 for( size_t i = 0; i < it.nplanes; i++, ++it )
2489 func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, Size((int)total, 1), &op );
2493 const Mat* arrays[] = { &src1, &dst, 0 };
2496 NAryMatIterator it(arrays, ptrs);
2497 size_t total = it.size, blocksize = std::min(total, blocksize0);
2499 AutoBuffer<uchar> _buf(blocksize*esz);
2502 if( depth1 > CV_32S )
2503 convertAndUnrollScalar( src2, depth1, buf, blocksize );
2507 getConvertFunc(depth2, CV_64F)(src2.data, 0, 0, 0, (uchar*)&fval, 0, Size(1,1), 0);
2508 if( fval < getMinVal(depth1) )
2510 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0);
2514 if( fval > getMaxVal(depth1) )
2516 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0);
2520 int ival = cvRound(fval);
2523 if( op == CMP_LT || op == CMP_GE )
2524 ival = cvCeil(fval);
2525 else if( op == CMP_LE || op == CMP_GT )
2526 ival = cvFloor(fval);
2529 dst = Scalar::all(op == CMP_NE ? 255 : 0);
2533 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize);
2536 for( size_t i = 0; i < it.nplanes; i++, ++it )
2538 for( size_t j = 0; j < total; j += blocksize )
2540 int bsz = (int)MIN(total - j, blocksize);
2541 func( ptrs[0], 0, buf, 0, ptrs[1], 0, Size(bsz, 1), &op);
2549 /****************************************************************************************\
2551 \****************************************************************************************/
2556 template<typename T> static void
2557 inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
2558 const T* src3, size_t step3, uchar* dst, size_t step,
2561 step1 /= sizeof(src1[0]);
2562 step2 /= sizeof(src2[0]);
2563 step3 /= sizeof(src3[0]);
2565 for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
2568 #if CV_ENABLE_UNROLLED
2569 for( ; x <= size.width - 4; x += 4 )
2572 t0 = src2[x] <= src1[x] && src1[x] <= src3[x];
2573 t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1];
2574 dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1;
2575 t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2];
2576 t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3];
2577 dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1;
2580 for( ; x < size.width; x++ )
2581 dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]);
2586 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2587 const uchar* src3, size_t step3, uchar* dst, size_t step, Size size)
2589 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2592 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
2593 const schar* src3, size_t step3, uchar* dst, size_t step, Size size)
2595 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2598 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2599 const ushort* src3, size_t step3, uchar* dst, size_t step, Size size)
2601 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2604 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2,
2605 const short* src3, size_t step3, uchar* dst, size_t step, Size size)
2607 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2610 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2,
2611 const int* src3, size_t step3, uchar* dst, size_t step, Size size)
2613 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2616 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2,
2617 const float* src3, size_t step3, uchar* dst, size_t step, Size size)
2619 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2622 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2,
2623 const double* src3, size_t step3, uchar* dst, size_t step, Size size)
2625 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
2628 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
2630 int k = cn % 4 ? cn % 4 : 4;
2633 for( i = j = 0; i < len; i++, j += cn )
2636 for( i = j = 0; i < len; i++, j += cn )
2637 dst[i] = src[j] & src[j+1];
2639 for( i = j = 0; i < len; i++, j += cn )
2640 dst[i] = src[j] & src[j+1] & src[j+2];
2642 for( i = j = 0; i < len; i++, j += cn )
2643 dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3];
2645 for( ; k < cn; k += 4 )
2647 for( i = 0, j = k; i < len; i++, j += cn )
2648 dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3];
2652 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2653 const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz );
2655 static InRangeFunc getInRangeFunc(int depth)
2657 static InRangeFunc inRangeTab[] =
2659 (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
2660 (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
2661 (InRangeFunc)inRange64f, 0
2664 return inRangeTab[depth];
2669 void cv::inRange(InputArray _src, InputArray _lowerb,
2670 InputArray _upperb, OutputArray _dst)
2672 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
2673 Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
2675 bool lbScalar = false, ubScalar = false;
2677 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
2678 src.size != lb.size || src.type() != lb.type() )
2680 if( !checkScalar(lb, src.type(), lkind, skind) )
2681 CV_Error( CV_StsUnmatchedSizes,
2682 "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
2686 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
2687 src.size != ub.size || src.type() != ub.type() )
2689 if( !checkScalar(ub, src.type(), ukind, skind) )
2690 CV_Error( CV_StsUnmatchedSizes,
2691 "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
2695 CV_Assert( ((int)lbScalar ^ (int)ubScalar) == 0 );
2697 int cn = src.channels(), depth = src.depth();
2699 size_t esz = src.elemSize();
2700 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
2702 _dst.create(src.dims, src.size, CV_8U);
2703 Mat dst = _dst.getMat();
2704 InRangeFunc func = getInRangeFunc(depth);
2706 const Mat* arrays_sc[] = { &src, &dst, 0 };
2707 const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
2710 NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
2711 size_t total = it.size, blocksize = std::min(total, blocksize0);
2713 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
2714 uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0;
2715 buf = alignPtr(buf + blocksize*cn, 16);
2717 if( lbScalar && ubScalar )
2720 ubuf = buf = alignPtr(buf + blocksize*esz, 16);
2722 CV_Assert( lb.type() == ub.type() );
2723 int scdepth = lb.depth();
2725 if( scdepth != depth && depth < CV_32S )
2727 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
2728 int* iubuf = ilbuf + cn;
2730 BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S);
2731 sccvtfunc(lb.data, 0, 0, 0, (uchar*)ilbuf, 0, Size(cn, 1), 0);
2732 sccvtfunc(ub.data, 0, 0, 0, (uchar*)iubuf, 0, Size(cn, 1), 0);
2733 int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth));
2735 for( int k = 0; k < cn; k++ )
2737 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
2738 ilbuf[k] = minval+1, iubuf[k] = minval;
2740 lb = Mat(cn, 1, CV_32S, ilbuf);
2741 ub = Mat(cn, 1, CV_32S, iubuf);
2744 convertAndUnrollScalar( lb, src.type(), lbuf, blocksize );
2745 convertAndUnrollScalar( ub, src.type(), ubuf, blocksize );
2748 for( size_t i = 0; i < it.nplanes; i++, ++it )
2750 for( size_t j = 0; j < total; j += blocksize )
2752 int bsz = (int)MIN(total - j, blocksize);
2753 size_t delta = bsz*esz;
2754 uchar *lptr = lbuf, *uptr = ubuf;
2762 int idx = !lbScalar ? 3 : 2;
2766 func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1));
2768 inRangeReduce(mbuf, ptrs[1], bsz, cn);
2775 /****************************************************************************************\
2776 * Earlier API: cvAdd etc. *
2777 \****************************************************************************************/
2780 cvNot( const CvArr* srcarr, CvArr* dstarr )
2782 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
2783 CV_Assert( src.size == dst.size && src.type() == dst.type() );
2784 cv::bitwise_not( src, dst );
2789 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
2791 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2792 dst = cv::cvarrToMat(dstarr), mask;
2793 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2795 mask = cv::cvarrToMat(maskarr);
2796 cv::bitwise_and( src1, src2, dst, mask );
2801 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
2803 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2804 dst = cv::cvarrToMat(dstarr), mask;
2805 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2807 mask = cv::cvarrToMat(maskarr);
2808 cv::bitwise_or( src1, src2, dst, mask );
2813 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
2815 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2816 dst = cv::cvarrToMat(dstarr), mask;
2817 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2819 mask = cv::cvarrToMat(maskarr);
2820 cv::bitwise_xor( src1, src2, dst, mask );
2825 cvAndS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
2827 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
2828 CV_Assert( src.size == dst.size && src.type() == dst.type() );
2830 mask = cv::cvarrToMat(maskarr);
2831 cv::bitwise_and( src, (const cv::Scalar&)s, dst, mask );
2836 cvOrS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
2838 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
2839 CV_Assert( src.size == dst.size && src.type() == dst.type() );
2841 mask = cv::cvarrToMat(maskarr);
2842 cv::bitwise_or( src, (const cv::Scalar&)s, dst, mask );
2847 cvXorS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
2849 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
2850 CV_Assert( src.size == dst.size && src.type() == dst.type() );
2852 mask = cv::cvarrToMat(maskarr);
2853 cv::bitwise_xor( src, (const cv::Scalar&)s, dst, mask );
2857 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
2859 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2860 dst = cv::cvarrToMat(dstarr), mask;
2861 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2863 mask = cv::cvarrToMat(maskarr);
2864 cv::add( src1, src2, dst, mask, dst.type() );
2868 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
2870 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2871 dst = cv::cvarrToMat(dstarr), mask;
2872 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2874 mask = cv::cvarrToMat(maskarr);
2875 cv::subtract( src1, src2, dst, mask, dst.type() );
2879 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr )
2881 cv::Mat src1 = cv::cvarrToMat(srcarr1),
2882 dst = cv::cvarrToMat(dstarr), mask;
2883 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2885 mask = cv::cvarrToMat(maskarr);
2886 cv::add( src1, (const cv::Scalar&)value, dst, mask, dst.type() );
2890 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr )
2892 cv::Mat src1 = cv::cvarrToMat(srcarr1),
2893 dst = cv::cvarrToMat(dstarr), mask;
2894 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2896 mask = cv::cvarrToMat(maskarr);
2897 cv::subtract( (const cv::Scalar&)value, src1, dst, mask, dst.type() );
2901 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2,
2902 CvArr* dstarr, double scale )
2904 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2905 dst = cv::cvarrToMat(dstarr);
2906 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2907 cv::multiply( src1, src2, dst, scale, dst.type() );
2911 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2,
2912 CvArr* dstarr, double scale )
2914 cv::Mat src2 = cv::cvarrToMat(srcarr2),
2915 dst = cv::cvarrToMat(dstarr), mask;
2916 CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() );
2919 cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() );
2921 cv::divide( scale, src2, dst, dst.type() );
2926 cvAddWeighted( const CvArr* srcarr1, double alpha,
2927 const CvArr* srcarr2, double beta,
2928 double gamma, CvArr* dstarr )
2930 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
2931 dst = cv::cvarrToMat(dstarr);
2932 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
2933 cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() );
2938 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr )
2940 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2941 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2943 cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst );
2948 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar scalar )
2950 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2951 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2953 cv::absdiff( src1, (const cv::Scalar&)scalar, dst );
2958 cvInRange( const void* srcarr1, const void* srcarr2,
2959 const void* srcarr3, void* dstarr )
2961 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2962 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2964 cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst );
2969 cvInRangeS( const void* srcarr1, CvScalar lowerb, CvScalar upperb, void* dstarr )
2971 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2972 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2974 cv::inRange( src1, (const cv::Scalar&)lowerb, (const cv::Scalar&)upperb, dst );
2979 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op )
2981 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2982 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2984 cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op );
2989 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op )
2991 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2992 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2994 cv::compare( src1, value, dst, cmp_op );
2999 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr )
3001 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
3002 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
3004 cv::min( src1, cv::cvarrToMat(srcarr2), dst );
3009 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr )
3011 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
3012 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
3014 cv::max( src1, cv::cvarrToMat(srcarr2), dst );
3019 cvMinS( const void* srcarr1, double value, void* dstarr )
3021 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
3022 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
3024 cv::min( src1, value, dst );
3029 cvMaxS( const void* srcarr1, double value, void* dstarr )
3031 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
3032 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
3034 cv::max( src1, value, dst );