1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
16 // Third party copyrights are property of their respective owners.
18 // Redistribution and use in source and binary forms, with or without modification,
19 // are permitted provided that the following conditions are met:
21 // * Redistribution's of source code must retain the above copyright notice,
22 // this list of conditions and the following disclaimer.
24 // * Redistribution's in binary form must reproduce the above copyright notice,
25 // this list of conditions and the following disclaimer in the documentation
26 // and/or other materials provided with the distribution.
28 // * The name of the copyright holders may not be used to endorse or promote products
29 // derived from this software without specific prior written permission.
31 // This software is provided by the copyright holders and contributors "as is" and
32 // any express or implied warranties, including, but not limited to, the implied
33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
34 // In no event shall the Intel Corporation or contributors be liable for any direct,
35 // indirect, incidental, special, exemplary, or consequential damages
36 // (including, but not limited to, procurement of substitute goods or services;
37 // loss of use, data, or profits; or business interruption) however caused
38 // and on any theory of liability, whether in contract, strict liability,
39 // or tort (including negligence or otherwise) arising in any way out of
40 // the use of this software, even if advised of the possibility of such damage.
44 /* ////////////////////////////////////////////////////////////////////
46 // Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ...
50 #include "precomp.hpp"
51 #include "opencl_kernels_core.hpp"
56 /****************************************************************************************\
57 * logical operations *
58 \****************************************************************************************/
60 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
62 int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
63 size_t esz = CV_ELEM_SIZE(buftype);
64 getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
68 CV_Assert( scn == 1 );
69 size_t esz1 = CV_ELEM_SIZE1(buftype);
70 for( size_t i = esz1; i < esz; i++ )
71 scbuf[i] = scbuf[i - esz1];
73 for( size_t i = esz; i < blocksize*esz; i++ )
74 scbuf[i] = scbuf[i - esz];
78 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
79 OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
80 OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
81 OCL_OP_RDIV_SCALE=15 };
85 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
86 "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
87 "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 };
89 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
90 InputArray _mask, bool bitwise, int oclop, bool haveScalar )
92 bool haveMask = !_mask.empty();
93 int srctype = _src1.type();
94 int srcdepth = CV_MAT_DEPTH(srctype);
95 int cn = CV_MAT_CN(srctype);
97 const ocl::Device d = ocl::Device::getDefault();
98 bool doubleSupport = d.doubleFPConfig() > 0;
99 if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
100 (!doubleSupport && srcdepth == CV_64F && !bitwise))
104 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
105 int scalarcn = kercn == 3 ? 4 : kercn;
106 int rowsPerWI = d.isIntel() ? 4 : 1;
108 sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d",
109 haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
110 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
111 ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
112 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) :
113 ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)),
114 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) :
115 ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
118 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
122 UMat src1 = _src1.getUMat(), src2;
123 UMat dst = _dst.getUMat(), mask = _mask.getUMat();
125 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
126 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
127 ocl::KernelArg::WriteOnly(dst, cn, kercn);
128 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
132 size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn;
133 double buf[4] = {0,0,0,0};
135 if( oclop != OCL_OP_NOT )
137 Mat src2sc = _src2.getMat();
138 convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
141 ocl::KernelArg scalararg = ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, buf, esz);
144 k.args(src1arg, dstarg, scalararg);
146 k.args(src1arg, maskarg, dstarg, scalararg);
150 src2 = _src2.getUMat();
151 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
154 k.args(src1arg, src2arg, dstarg);
156 k.args(src1arg, src2arg, maskarg, dstarg);
159 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
160 return k.run(2, globalsize, 0, false);
165 static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
166 InputArray _mask, const BinaryFuncC* tab,
167 bool bitwise, int oclop )
169 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
170 int kind1 = psrc1->kind(), kind2 = psrc2->kind();
171 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
172 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
173 int dims1 = psrc1->dims(), dims2 = psrc2->dims();
174 Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
175 Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
177 bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
178 dims1 <= 2 && dims2 <= 2;
180 bool haveMask = !_mask.empty(), haveScalar = false;
183 if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask )
185 _dst.create(sz1, type1);
186 CV_OCL_RUN(use_opencl,
187 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false))
192 cn = (int)CV_ELEM_SIZE(type1);
197 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
198 Size sz = getContinuousSize(src1, src2, dst);
199 size_t len = sz.width*(size_t)cn;
200 if( len == (size_t)(int)len )
203 func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, 0);
208 if( oclop == OCL_OP_NOT )
210 else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
211 !psrc1->sameSize(*psrc2) || type1 != type2 )
213 if( checkScalar(*psrc1, type2, kind1, kind2) )
215 // src1 is a scalar; swap it with src2
218 swap(depth1, depth2);
222 else if( !checkScalar(*psrc2, type1, kind2, kind1) )
223 CV_Error( CV_StsUnmatchedSizes,
224 "The operation is neither 'array op array' (where arrays have the same size and type), "
225 "nor 'array op scalar', nor 'scalar op array'" );
230 CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 );
233 size_t esz = CV_ELEM_SIZE(type1);
234 size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
235 BinaryFunc copymask = 0;
236 bool reallocate = false;
240 int mtype = _mask.type();
241 CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1));
242 copymask = getCopyMaskFunc(esz);
243 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1;
246 AutoBuffer<uchar> _buf;
247 uchar *scbuf = 0, *maskbuf = 0;
249 _dst.createSameSize(*psrc1, type1);
250 // if this is mask operation and dst has been reallocated,
251 // we have to clear the destination
252 if( haveMask && reallocate )
255 CV_OCL_RUN(use_opencl,
256 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar))
259 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat();
260 Mat dst = _dst.getMat(), mask = _mask.getMat();
272 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
275 NAryMatIterator it(arrays, ptrs);
276 size_t total = it.size, blocksize = total;
278 if( blocksize*cn > INT_MAX )
279 blocksize = INT_MAX/cn;
283 blocksize = std::min(blocksize, blocksize0);
284 _buf.allocate(blocksize*esz);
288 for( size_t i = 0; i < it.nplanes; i++, ++it )
290 for( size_t j = 0; j < total; j += blocksize )
292 int bsz = (int)MIN(total - j, blocksize);
294 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, bsz*cn, 1, 0 );
297 copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
302 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz;
308 const Mat* arrays[] = { &src1, &dst, &mask, 0 };
311 NAryMatIterator it(arrays, ptrs);
312 size_t total = it.size, blocksize = std::min(total, blocksize0);
314 _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32);
316 maskbuf = alignPtr(scbuf + blocksize*esz, 16);
318 convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize);
320 for( size_t i = 0; i < it.nplanes; i++, ++it )
322 for( size_t j = 0; j < total; j += blocksize )
324 int bsz = (int)MIN(total - j, blocksize);
326 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, bsz*cn, 1, 0 );
329 copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
334 ptrs[0] += bsz; ptrs[1] += bsz;
340 static BinaryFuncC* getMaxTab()
342 static BinaryFuncC maxTab[] =
344 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s),
345 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s),
346 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32s),
347 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32f), (BinaryFuncC)cv::hal::max64f,
354 static BinaryFuncC* getMinTab()
356 static BinaryFuncC minTab[] =
358 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s),
359 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s),
360 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32s),
361 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32f), (BinaryFuncC)cv::hal::min64f,
370 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
372 CV_INSTRUMENT_REGION()
374 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::and8u);
375 binary_op(a, b, c, mask, &f, true, OCL_OP_AND);
378 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
380 CV_INSTRUMENT_REGION()
382 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::or8u);
383 binary_op(a, b, c, mask, &f, true, OCL_OP_OR);
386 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
388 CV_INSTRUMENT_REGION()
390 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::xor8u);
391 binary_op(a, b, c, mask, &f, true, OCL_OP_XOR);
394 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
396 CV_INSTRUMENT_REGION()
398 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::not8u);
399 binary_op(a, a, c, mask, &f, true, OCL_OP_NOT);
402 void cv::max( InputArray src1, InputArray src2, OutputArray dst )
404 CV_INSTRUMENT_REGION()
406 binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
409 void cv::min( InputArray src1, InputArray src2, OutputArray dst )
411 CV_INSTRUMENT_REGION()
413 binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN );
416 void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
418 CV_INSTRUMENT_REGION()
420 OutputArray _dst(dst);
421 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
424 void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
426 CV_INSTRUMENT_REGION()
428 OutputArray _dst(dst);
429 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
432 void cv::max(const UMat& src1, const UMat& src2, UMat& dst)
434 CV_INSTRUMENT_REGION()
436 OutputArray _dst(dst);
437 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
440 void cv::min(const UMat& src1, const UMat& src2, UMat& dst)
442 CV_INSTRUMENT_REGION()
444 OutputArray _dst(dst);
445 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
449 /****************************************************************************************\
451 \****************************************************************************************/
456 static int actualScalarDepth(const double* data, int len)
458 int i = 0, minval = INT_MAX, maxval = INT_MIN;
461 int ival = cvRound(data[i]);
462 if( ival != data[i] )
464 minval = MIN(minval, ival);
465 maxval = MAX(maxval, ival);
467 return i < len ? CV_64F :
468 minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U :
469 minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S :
470 minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U :
471 minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S :
477 static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
478 InputArray _mask, int wtype,
479 void* usrdata, int oclop,
482 const ocl::Device d = ocl::Device::getDefault();
483 bool doubleSupport = d.doubleFPConfig() > 0;
484 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
485 bool haveMask = !_mask.empty();
487 if ( (haveMask || haveScalar) && cn > 4 )
490 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
492 wdepth = std::min(wdepth, CV_32F);
494 wtype = CV_MAKETYPE(wdepth, cn);
495 int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2);
496 if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F))
499 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
500 int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1;
502 char cvtstr[4][32], opts[1024];
503 sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
504 "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
505 "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s",
506 (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
507 oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
508 ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
509 ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
510 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
511 ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
512 ocl::typeToStr(wdepth), wdepth,
513 ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
514 ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
515 ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
516 doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI,
517 oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ?
518 ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert");
520 size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
521 const uchar* usrdata_p = (const uchar*)usrdata;
522 const double* usrdata_d = (const double*)usrdata;
524 int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
525 oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
526 if( usrdata && n > 0 && wdepth == CV_32F )
528 for( i = 0; i < n; i++ )
529 usrdata_f[i] = (float)usrdata_d[i];
530 usrdata_p = (const uchar*)usrdata_f;
533 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
537 UMat src1 = _src1.getUMat(), src2;
538 UMat dst = _dst.getUMat(), mask = _mask.getUMat();
540 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
541 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
542 ocl::KernelArg::WriteOnly(dst, cn, kercn);
543 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
547 size_t esz = CV_ELEM_SIZE1(wtype)*scalarcn;
548 double buf[4]={0,0,0,0};
549 Mat src2sc = _src2.getMat();
551 if( !src2sc.empty() )
552 convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
553 ocl::KernelArg scalararg = ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, buf, esz);
558 k.args(src1arg, dstarg, scalararg);
560 k.args(src1arg, dstarg, scalararg,
561 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, usrdata_p, usrdata_esz));
563 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
566 k.args(src1arg, maskarg, dstarg, scalararg);
570 src2 = _src2.getUMat();
571 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
576 k.args(src1arg, src2arg, dstarg);
578 k.args(src1arg, src2arg, dstarg,
579 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, usrdata_p, usrdata_esz));
581 k.args(src1arg, src2arg, dstarg,
582 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, usrdata_p, usrdata_esz),
583 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
584 ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
586 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
589 k.args(src1arg, src2arg, maskarg, dstarg);
592 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
593 return k.run(2, globalsize, NULL, false);
598 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
599 InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
600 void* usrdata=0, int oclop=-1 )
602 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
603 int kind1 = psrc1->kind(), kind2 = psrc2->kind();
604 bool haveMask = !_mask.empty();
605 bool reallocate = false;
606 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
607 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
608 int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
609 Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
610 Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
612 bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
614 bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
615 bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
617 if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
618 !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
619 (_dst.fixedType() && _dst.type() == type1)) &&
620 ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
622 _dst.createSameSize(*psrc1, type1);
623 CV_OCL_RUN(use_opencl,
624 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
625 (!usrdata ? type1 : std::max(depth1, CV_32F)),
626 usrdata, oclop, false))
628 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
629 Size sz = getContinuousSize(src1, src2, dst, src1.channels());
630 tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
634 bool haveScalar = false, swapped12 = false;
636 if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
637 (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
638 (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
640 if( checkScalar(*psrc1, type2, kind1, kind2) )
642 // src1 is a scalar; swap it with src2
646 swap(depth1, depth2);
650 if( oclop == OCL_OP_SUB )
652 if ( oclop == OCL_OP_DIV_SCALE )
653 oclop = OCL_OP_RDIV_SCALE;
655 else if( !checkScalar(*psrc2, type1, kind2, kind1) )
656 CV_Error( CV_StsUnmatchedSizes,
657 "The operation is neither 'array op array' "
658 "(where arrays have the same size and the same number of channels), "
659 "nor 'array op scalar', nor 'scalar op array'" );
661 CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
665 Mat sc = psrc2->getMat();
666 depth2 = actualScalarDepth(sc.ptr<double>(), sz2 == Size(1, 1) ? cn2 : cn);
667 if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
676 if( _dst.fixedType() )
680 if( !haveScalar && type1 != type2 )
681 CV_Error(CV_StsBadArg,
682 "When the input arrays in add/subtract/multiply/divide functions have different types, "
683 "the output array type must be explicitly specified");
687 dtype = CV_MAT_DEPTH(dtype);
689 if( depth1 == depth2 && dtype == depth1 )
693 wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S :
694 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2);
695 wtype = std::max(wtype, dtype);
697 // when the result of addition should be converted to an integer type,
698 // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
699 // instead of converting the other input to floating-point and then converting the operation result back to integers.
700 if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) )
705 wtype = std::max(depth1, std::max(depth2, CV_32F));
706 wtype = std::max(wtype, dtype);
709 dtype = CV_MAKETYPE(dtype, cn);
710 wtype = CV_MAKETYPE(wtype, cn);
714 int mtype = _mask.type();
715 CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
716 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
719 _dst.createSameSize(*psrc1, dtype);
723 CV_OCL_RUN(use_opencl,
724 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
725 usrdata, oclop, haveScalar))
727 BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
728 BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
729 BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
731 size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
732 size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
733 size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
734 BinaryFunc copymask = getCopyMaskFunc(dsz);
735 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
737 AutoBuffer<uchar> _buf;
738 uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
739 size_t bufesz = (cvtsrc1 ? wsz : 0) +
740 (cvtsrc2 || haveScalar ? wsz : 0) +
742 (haveMask ? dsz : 0);
743 BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)];
747 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
750 NAryMatIterator it(arrays, ptrs);
751 size_t total = it.size, blocksize = total;
753 if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst )
754 blocksize = std::min(blocksize, blocksize0);
756 _buf.allocate(bufesz*blocksize + 64);
759 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
761 buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
762 wbuf = maskbuf = buf;
764 buf = alignPtr(buf + blocksize*wsz, 16);
768 for( size_t i = 0; i < it.nplanes; i++, ++it )
770 for( size_t j = 0; j < total; j += blocksize )
772 int bsz = (int)MIN(total - j, blocksize);
773 Size bszn(bsz*cn, 1);
774 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
775 uchar* dptr = ptrs[2];
778 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
781 if( ptrs[0] == ptrs[1] )
785 cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
789 if( !haveMask && !cvtdst )
790 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
793 func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata );
795 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
798 copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
803 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
804 copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
808 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
814 const Mat* arrays[] = { &src1, &dst, &mask, 0 };
817 NAryMatIterator it(arrays, ptrs);
818 size_t total = it.size, blocksize = std::min(total, blocksize0);
820 _buf.allocate(bufesz*blocksize + 64);
823 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
824 buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
825 wbuf = maskbuf = buf;
827 buf = alignPtr(buf + blocksize*wsz, 16);
831 convertAndUnrollScalar( src2, wtype, buf2, blocksize);
833 for( size_t i = 0; i < it.nplanes; i++, ++it )
835 for( size_t j = 0; j < total; j += blocksize )
837 int bsz = (int)MIN(total - j, blocksize);
838 Size bszn(bsz*cn, 1);
839 const uchar *sptr1 = ptrs[0];
840 const uchar* sptr2 = buf2;
841 uchar* dptr = ptrs[1];
845 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
850 std::swap(sptr1, sptr2);
852 if( !haveMask && !cvtdst )
853 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
856 func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata );
858 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
861 copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
866 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
867 copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
871 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
877 static BinaryFuncC* getAddTab()
879 static BinaryFuncC addTab[] =
881 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s),
882 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s),
883 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32s),
884 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32f), (BinaryFuncC)cv::hal::add64f,
891 static BinaryFuncC* getSubTab()
893 static BinaryFuncC subTab[] =
895 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s),
896 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s),
897 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32s),
898 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32f), (BinaryFuncC)cv::hal::sub64f,
905 static BinaryFuncC* getAbsDiffTab()
907 static BinaryFuncC absDiffTab[] =
909 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s),
910 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s),
911 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32s),
912 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32f), (BinaryFuncC)cv::hal::absdiff64f,
921 void cv::add( InputArray src1, InputArray src2, OutputArray dst,
922 InputArray mask, int dtype )
924 CV_INSTRUMENT_REGION()
926 arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
929 void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
930 InputArray mask, int dtype )
932 CV_INSTRUMENT_REGION()
934 arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB );
937 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
939 CV_INSTRUMENT_REGION()
941 arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
944 /****************************************************************************************\
946 \****************************************************************************************/
951 static BinaryFuncC* getMulTab()
953 static BinaryFuncC mulTab[] =
955 (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u,
956 (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f,
957 (BinaryFuncC)cv::hal::mul64f, 0
963 static BinaryFuncC* getDivTab()
965 static BinaryFuncC divTab[] =
967 (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u,
968 (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f,
969 (BinaryFuncC)cv::hal::div64f, 0
975 static BinaryFuncC* getRecipTab()
977 static BinaryFuncC recipTab[] =
979 (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u,
980 (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f,
981 (BinaryFuncC)cv::hal::recip64f, 0
989 void cv::multiply(InputArray src1, InputArray src2,
990 OutputArray dst, double scale, int dtype)
992 CV_INSTRUMENT_REGION()
994 arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
995 true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
998 void cv::divide(InputArray src1, InputArray src2,
999 OutputArray dst, double scale, int dtype)
1001 CV_INSTRUMENT_REGION()
1003 arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
1006 void cv::divide(double scale, InputArray src2,
1007 OutputArray dst, int dtype)
1009 CV_INSTRUMENT_REGION()
1011 arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
1014 /****************************************************************************************\
1016 \****************************************************************************************/
1021 static BinaryFuncC* getAddWeightedTab()
1023 static BinaryFuncC addWeightedTab[] =
1025 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u),
1026 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f,
1027 (BinaryFuncC)cv::hal::addWeighted64f, 0
1030 return addWeightedTab;
1035 void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
1036 double beta, double gamma, OutputArray dst, int dtype )
1038 CV_INSTRUMENT_REGION()
1040 double scalars[] = {alpha, beta, gamma};
1041 arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
1045 /****************************************************************************************\
1047 \****************************************************************************************/
1052 static BinaryFuncC getCmpFunc(int depth)
1054 static BinaryFuncC cmpTab[] =
1056 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s),
1057 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s),
1058 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32s),
1059 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32f), (BinaryFuncC)cv::hal::cmp64f,
1063 return cmpTab[depth];
1066 static double getMinVal(int depth)
1068 static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0};
1072 static double getMaxVal(int depth)
1074 static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0};
1080 static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar)
1082 const ocl::Device& dev = ocl::Device::getDefault();
1083 bool doubleSupport = dev.doubleFPConfig() > 0;
1084 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1),
1085 type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2);
1087 if (!doubleSupport && depth1 == CV_64F)
1090 if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2))
1093 int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
1094 // Workaround for bug with "?:" operator in AMD OpenCL compiler
1095 if (depth1 >= CV_16U)
1098 int scalarcn = kercn == 3 ? 4 : kercn;
1099 const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
1102 String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
1103 " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
1104 " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s",
1105 haveScalar ? "UNARY_OP" : "BINARY_OP",
1106 ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
1107 ocl::typeToStr(CV_8UC(kercn)), kercn,
1108 ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
1109 operationMap[op], ocl::typeToStr(depth1),
1110 ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
1111 ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI,
1112 doubleSupport ? " -D DOUBLE_SUPPORT" : "");
1114 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
1118 UMat src1 = _src1.getUMat();
1119 Size size = src1.size();
1120 _dst.create(size, CV_8UC(cn));
1121 UMat dst = _dst.getUMat();
1125 size_t esz = CV_ELEM_SIZE1(type1) * scalarcn;
1126 double buf[4] = { 0, 0, 0, 0 };
1127 Mat src2 = _src2.getMat();
1129 if( depth1 > CV_32S )
1130 convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn );
1134 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar *)&fval, 1, Size(1, 1), 0);
1135 if( fval < getMinVal(depth1) )
1136 return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true;
1138 if( fval > getMaxVal(depth1) )
1139 return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true;
1141 int ival = cvRound(fval);
1144 if( op == CMP_LT || op == CMP_GE )
1145 ival = cvCeil(fval);
1146 else if( op == CMP_LE || op == CMP_GT )
1147 ival = cvFloor(fval);
1149 return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true;
1151 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn);
1154 ocl::KernelArg scalararg = ocl::KernelArg(ocl::KernelArg::CONSTANT, 0, 0, 0, buf, esz);
1156 k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn),
1157 ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg);
1161 UMat src2 = _src2.getUMat();
1163 k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
1164 ocl::KernelArg::ReadOnlyNoSize(src2),
1165 ocl::KernelArg::WriteOnly(dst, cn, kercn));
1168 size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
1169 return k.run(2, globalsize, NULL, false);
1176 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
1178 CV_INSTRUMENT_REGION()
1180 CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
1181 op == CMP_NE || op == CMP_GE || op == CMP_GT );
1183 bool haveScalar = false;
1185 if ((_src1.isMatx() + _src2.isMatx()) == 1
1186 || !_src1.sameSize(_src2)
1187 || _src1.type() != _src2.type())
1189 bool is_src1_scalar = checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind());
1190 bool is_src2_scalar = checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind());
1192 if (is_src1_scalar && !is_src2_scalar)
1194 op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE :
1195 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op;
1196 // src1 is a scalar; swap it with src2
1197 compare(_src2, _src1, _dst, op);
1200 else if( (is_src1_scalar && is_src2_scalar) || (!is_src1_scalar && !is_src2_scalar) )
1201 CV_Error( CV_StsUnmatchedSizes,
1202 "The operation is neither 'array op array' (where arrays have the same size and the same type), "
1203 "nor 'array op scalar', nor 'scalar op array'" );
1207 CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
1208 ocl_compare(_src1, _src2, _dst, op, haveScalar))
1210 int kind1 = _src1.kind(), kind2 = _src2.kind();
1211 Mat src1 = _src1.getMat(), src2 = _src2.getMat();
1213 if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() )
1215 int cn = src1.channels();
1216 _dst.create(src1.size(), CV_8UC(cn));
1217 Mat dst = _dst.getMat();
1218 Size sz = getContinuousSize(src1, src2, dst, src1.channels());
1219 getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op);
1223 int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth();
1225 _dst.create(src1.dims, src1.size, CV_8UC(cn));
1226 src1 = src1.reshape(1); src2 = src2.reshape(1);
1227 Mat dst = _dst.getMat().reshape(1);
1229 size_t esz = std::max(src1.elemSize(), (size_t)1);
1230 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
1231 BinaryFuncC func = getCmpFunc(depth1);
1235 const Mat* arrays[] = { &src1, &src2, &dst, 0 };
1238 NAryMatIterator it(arrays, ptrs);
1239 size_t total = it.size;
1241 for( size_t i = 0; i < it.nplanes; i++, ++it )
1242 func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, (int)total, 1, &op );
1246 const Mat* arrays[] = { &src1, &dst, 0 };
1249 NAryMatIterator it(arrays, ptrs);
1250 size_t total = it.size, blocksize = std::min(total, blocksize0);
1252 AutoBuffer<uchar> _buf(blocksize*esz);
1255 if( depth1 > CV_32S )
1256 convertAndUnrollScalar( src2, depth1, buf, blocksize );
1260 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0);
1261 if( fval < getMinVal(depth1) )
1263 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0);
1267 if( fval > getMaxVal(depth1) )
1269 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0);
1273 int ival = cvRound(fval);
1276 if( op == CMP_LT || op == CMP_GE )
1277 ival = cvCeil(fval);
1278 else if( op == CMP_LE || op == CMP_GT )
1279 ival = cvFloor(fval);
1282 dst = Scalar::all(op == CMP_NE ? 255 : 0);
1286 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize);
1289 for( size_t i = 0; i < it.nplanes; i++, ++it )
1291 for( size_t j = 0; j < total; j += blocksize )
1293 int bsz = (int)MIN(total - j, blocksize);
1294 func( ptrs[0], 0, buf, 0, ptrs[1], 0, bsz, 1, &op);
1302 /****************************************************************************************\
1304 \****************************************************************************************/
1309 template <typename T>
1312 int operator () (const T *, const T *, const T *, uchar *, int) const
1321 struct InRange_SIMD<uchar>
1323 int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
1324 uchar * dst, int len) const
1327 const int width = v_uint8x16::nlanes;
1329 for (; x <= len - width; x += width)
1331 v_uint8x16 values = v_load(src1 + x);
1332 v_uint8x16 low = v_load(src2 + x);
1333 v_uint8x16 high = v_load(src3 + x);
1335 v_store(dst + x, (values >= low) & (high >= values));
1342 struct InRange_SIMD<schar>
1344 int operator () (const schar * src1, const schar * src2, const schar * src3,
1345 uchar * dst, int len) const
1348 const int width = v_int8x16::nlanes;
1350 for (; x <= len - width; x += width)
1352 v_int8x16 values = v_load(src1 + x);
1353 v_int8x16 low = v_load(src2 + x);
1354 v_int8x16 high = v_load(src3 + x);
1356 v_store((schar*)(dst + x), (values >= low) & (high >= values));
1363 struct InRange_SIMD<ushort>
1365 int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
1366 uchar * dst, int len) const
1369 const int width = v_uint16x8::nlanes * 2;
1371 for (; x <= len - width; x += width)
1373 v_uint16x8 values1 = v_load(src1 + x);
1374 v_uint16x8 low1 = v_load(src2 + x);
1375 v_uint16x8 high1 = v_load(src3 + x);
1377 v_uint16x8 values2 = v_load(src1 + x + v_uint16x8::nlanes);
1378 v_uint16x8 low2 = v_load(src2 + x + v_uint16x8::nlanes);
1379 v_uint16x8 high2 = v_load(src3 + x + v_uint16x8::nlanes);
1381 v_store(dst + x, v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
1388 struct InRange_SIMD<short>
1390 int operator () (const short * src1, const short * src2, const short * src3,
1391 uchar * dst, int len) const
1394 const int width = (int)v_int16x8::nlanes * 2;
1396 for (; x <= len - width; x += width)
1398 v_int16x8 values1 = v_load(src1 + x);
1399 v_int16x8 low1 = v_load(src2 + x);
1400 v_int16x8 high1 = v_load(src3 + x);
1402 v_int16x8 values2 = v_load(src1 + x + v_int16x8::nlanes);
1403 v_int16x8 low2 = v_load(src2 + x + v_int16x8::nlanes);
1404 v_int16x8 high2 = v_load(src3 + x + v_int16x8::nlanes);
1406 v_store((schar*)(dst + x), v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2)));
1413 struct InRange_SIMD<int>
1415 int operator () (const int * src1, const int * src2, const int * src3,
1416 uchar * dst, int len) const
1419 const int width = (int)v_int32x4::nlanes * 2;
1421 for (; x <= len - width; x += width)
1423 v_int32x4 values1 = v_load(src1 + x);
1424 v_int32x4 low1 = v_load(src2 + x);
1425 v_int32x4 high1 = v_load(src3 + x);
1427 v_int32x4 values2 = v_load(src1 + x + v_int32x4::nlanes);
1428 v_int32x4 low2 = v_load(src2 + x + v_int32x4::nlanes);
1429 v_int32x4 high2 = v_load(src3 + x + v_int32x4::nlanes);
1431 v_pack_store(dst + x, v_reinterpret_as_u16(v_pack((values1 >= low1) & (high1 >= values1), (values2 >= low2) & (high2 >= values2))));
1438 struct InRange_SIMD<float>
1440 int operator () (const float * src1, const float * src2, const float * src3,
1441 uchar * dst, int len) const
1444 const int width = (int)v_float32x4::nlanes * 2;
1446 for (; x <= len - width; x += width)
1448 v_float32x4 values1 = v_load(src1 + x);
1449 v_float32x4 low1 = v_load(src2 + x);
1450 v_float32x4 high1 = v_load(src3 + x);
1452 v_float32x4 values2 = v_load(src1 + x + v_float32x4::nlanes);
1453 v_float32x4 low2 = v_load(src2 + x + v_float32x4::nlanes);
1454 v_float32x4 high2 = v_load(src3 + x + v_float32x4::nlanes);
1456 v_pack_store(dst + x, v_pack(v_reinterpret_as_u32((values1 >= low1) & (high1 >= values1)), v_reinterpret_as_u32((values2 >= low2) & (high2 >= values2))));
1464 template <typename T>
1465 static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
1466 const T* src3, size_t step3, uchar* dst, size_t step,
1469 step1 /= sizeof(src1[0]);
1470 step2 /= sizeof(src2[0]);
1471 step3 /= sizeof(src3[0]);
1473 InRange_SIMD<T> vop;
1475 for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
1477 int x = vop(src1, src2, src3, dst, size.width);
1478 #if CV_ENABLE_UNROLLED
1479 for( ; x <= size.width - 4; x += 4 )
1482 t0 = src2[x] <= src1[x] && src1[x] <= src3[x];
1483 t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1];
1484 dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1;
1485 t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2];
1486 t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3];
1487 dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1;
1490 for( ; x < size.width; x++ )
1491 dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]);
1496 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
1497 const uchar* src3, size_t step3, uchar* dst, size_t step, Size size)
1499 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1502 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
1503 const schar* src3, size_t step3, uchar* dst, size_t step, Size size)
1505 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1508 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
1509 const ushort* src3, size_t step3, uchar* dst, size_t step, Size size)
1511 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1514 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2,
1515 const short* src3, size_t step3, uchar* dst, size_t step, Size size)
1517 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1520 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2,
1521 const int* src3, size_t step3, uchar* dst, size_t step, Size size)
1523 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1526 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2,
1527 const float* src3, size_t step3, uchar* dst, size_t step, Size size)
1529 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1532 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2,
1533 const double* src3, size_t step3, uchar* dst, size_t step, Size size)
1535 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
1538 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
1540 int k = cn % 4 ? cn % 4 : 4;
1543 for( i = j = 0; i < len; i++, j += cn )
1546 for( i = j = 0; i < len; i++, j += cn )
1547 dst[i] = src[j] & src[j+1];
1549 for( i = j = 0; i < len; i++, j += cn )
1550 dst[i] = src[j] & src[j+1] & src[j+2];
1552 for( i = j = 0; i < len; i++, j += cn )
1553 dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3];
1555 for( ; k < cn; k += 4 )
1557 for( i = 0, j = k; i < len; i++, j += cn )
1558 dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3];
1562 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
1563 const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz );
1565 static InRangeFunc getInRangeFunc(int depth)
1567 static InRangeFunc inRangeTab[] =
1569 (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
1570 (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
1571 (InRangeFunc)inRange64f, 0
1574 return inRangeTab[depth];
1579 static bool ocl_inRange( InputArray _src, InputArray _lowerb,
1580 InputArray _upperb, OutputArray _dst )
1582 const ocl::Device & d = ocl::Device::getDefault();
1583 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
1584 Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size();
1585 int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type();
1586 int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype);
1587 int cn = CV_MAT_CN(stype), rowsPerWI = d.isIntel() ? 4 : 1;
1588 bool lbScalar = false, ubScalar = false;
1590 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
1591 ssize != lsize || stype != ltype )
1593 if( !checkScalar(_lowerb, stype, lkind, skind) )
1594 CV_Error( CV_StsUnmatchedSizes,
1595 "The lower boundary is neither an array of the same size and same type as src, nor a scalar");
1599 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
1600 ssize != usize || stype != utype )
1602 if( !checkScalar(_upperb, stype, ukind, skind) )
1603 CV_Error( CV_StsUnmatchedSizes,
1604 "The upper boundary is neither an array of the same size and same type as src, nor a scalar");
1608 if (lbScalar != ubScalar)
1611 bool doubleSupport = d.doubleFPConfig() > 0,
1612 haveScalar = lbScalar && ubScalar;
1614 if ( (!doubleSupport && sdepth == CV_64F) ||
1615 (!haveScalar && (sdepth != ldepth || sdepth != udepth)) )
1618 int kercn = haveScalar ? cn : std::max(std::min(ocl::predictOptimalVectorWidth(_src, _lowerb, _upperb, _dst), 4), cn);
1619 if (kercn % cn != 0)
1621 int colsPerWI = kercn / cn;
1622 String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d",
1623 haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)),
1624 ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth,
1625 doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI);
1627 ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc, opts);
1631 _dst.create(ssize, CV_8UC1);
1632 UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru;
1633 Mat lscalar, uscalar;
1635 if (lbScalar && ubScalar)
1637 lscalar = _lowerb.getMat();
1638 uscalar = _upperb.getMat();
1640 size_t esz = src.elemSize();
1641 size_t blocksize = 36;
1643 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
1644 uchar *buf = alignPtr(_buf + blocksize*cn, 16);
1646 if( ldepth != sdepth && sdepth < CV_32S )
1648 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
1649 int* iubuf = ilbuf + cn;
1651 BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S);
1652 sccvtfunc(lscalar.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
1653 sccvtfunc(uscalar.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
1654 int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth));
1656 for( int k = 0; k < cn; k++ )
1658 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
1659 ilbuf[k] = minval+1, iubuf[k] = minval;
1661 lscalar = Mat(cn, 1, CV_32S, ilbuf);
1662 uscalar = Mat(cn, 1, CV_32S, iubuf);
1665 lscalar.convertTo(lscalar, stype);
1666 uscalar.convertTo(uscalar, stype);
1670 lscalaru = _lowerb.getUMat();
1671 uscalaru = _upperb.getUMat();
1674 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
1675 dstarg = ocl::KernelArg::WriteOnly(dst, 1, colsPerWI);
1679 lscalar.copyTo(lscalaru);
1680 uscalar.copyTo(uscalaru);
1682 ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru),
1683 ocl::KernelArg::PtrReadOnly(uscalaru), rowsPerWI);
1686 ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
1687 ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI);
1689 size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI };
1690 return ker.run(2, globalsize, NULL, false);
1697 void cv::inRange(InputArray _src, InputArray _lowerb,
1698 InputArray _upperb, OutputArray _dst)
1700 CV_INSTRUMENT_REGION()
1702 CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 &&
1703 _upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
1704 ocl_inRange(_src, _lowerb, _upperb, _dst))
1706 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
1707 Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
1709 bool lbScalar = false, ubScalar = false;
1711 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
1712 src.size != lb.size || src.type() != lb.type() )
1714 if( !checkScalar(lb, src.type(), lkind, skind) )
1715 CV_Error( CV_StsUnmatchedSizes,
1716 "The lower boundary is neither an array of the same size and same type as src, nor a scalar");
1720 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
1721 src.size != ub.size || src.type() != ub.type() )
1723 if( !checkScalar(ub, src.type(), ukind, skind) )
1724 CV_Error( CV_StsUnmatchedSizes,
1725 "The upper boundary is neither an array of the same size and same type as src, nor a scalar");
1729 CV_Assert(lbScalar == ubScalar);
1731 int cn = src.channels(), depth = src.depth();
1733 size_t esz = src.elemSize();
1734 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
1736 _dst.create(src.dims, src.size, CV_8UC1);
1737 Mat dst = _dst.getMat();
1738 InRangeFunc func = getInRangeFunc(depth);
1740 const Mat* arrays_sc[] = { &src, &dst, 0 };
1741 const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
1744 NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
1745 size_t total = it.size, blocksize = std::min(total, blocksize0);
1747 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
1748 uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0;
1749 buf = alignPtr(buf + blocksize*cn, 16);
1751 if( lbScalar && ubScalar )
1754 ubuf = buf = alignPtr(buf + blocksize*esz, 16);
1756 CV_Assert( lb.type() == ub.type() );
1757 int scdepth = lb.depth();
1759 if( scdepth != depth && depth < CV_32S )
1761 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
1762 int* iubuf = ilbuf + cn;
1764 BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S);
1765 sccvtfunc(lb.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
1766 sccvtfunc(ub.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
1767 int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth));
1769 for( int k = 0; k < cn; k++ )
1771 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
1772 ilbuf[k] = minval+1, iubuf[k] = minval;
1774 lb = Mat(cn, 1, CV_32S, ilbuf);
1775 ub = Mat(cn, 1, CV_32S, iubuf);
1778 convertAndUnrollScalar( lb, src.type(), lbuf, blocksize );
1779 convertAndUnrollScalar( ub, src.type(), ubuf, blocksize );
1782 for( size_t i = 0; i < it.nplanes; i++, ++it )
1784 for( size_t j = 0; j < total; j += blocksize )
1786 int bsz = (int)MIN(total - j, blocksize);
1787 size_t delta = bsz*esz;
1788 uchar *lptr = lbuf, *uptr = ubuf;
1796 int idx = !lbScalar ? 3 : 2;
1800 func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1));
1802 inRangeReduce(mbuf, ptrs[1], bsz, cn);
1809 /****************************************************************************************\
1810 * Earlier API: cvAdd etc. *
1811 \****************************************************************************************/
1814 cvNot( const CvArr* srcarr, CvArr* dstarr )
1816 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
1817 CV_Assert( src.size == dst.size && src.type() == dst.type() );
1818 cv::bitwise_not( src, dst );
1823 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
1825 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1826 dst = cv::cvarrToMat(dstarr), mask;
1827 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
1829 mask = cv::cvarrToMat(maskarr);
1830 cv::bitwise_and( src1, src2, dst, mask );
1835 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
1837 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1838 dst = cv::cvarrToMat(dstarr), mask;
1839 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
1841 mask = cv::cvarrToMat(maskarr);
1842 cv::bitwise_or( src1, src2, dst, mask );
1847 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
1849 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1850 dst = cv::cvarrToMat(dstarr), mask;
1851 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
1853 mask = cv::cvarrToMat(maskarr);
1854 cv::bitwise_xor( src1, src2, dst, mask );
1859 cvAndS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
1861 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
1862 CV_Assert( src.size == dst.size && src.type() == dst.type() );
1864 mask = cv::cvarrToMat(maskarr);
1865 cv::bitwise_and( src, (const cv::Scalar&)s, dst, mask );
1870 cvOrS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
1872 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
1873 CV_Assert( src.size == dst.size && src.type() == dst.type() );
1875 mask = cv::cvarrToMat(maskarr);
1876 cv::bitwise_or( src, (const cv::Scalar&)s, dst, mask );
1881 cvXorS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr )
1883 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
1884 CV_Assert( src.size == dst.size && src.type() == dst.type() );
1886 mask = cv::cvarrToMat(maskarr);
1887 cv::bitwise_xor( src, (const cv::Scalar&)s, dst, mask );
1891 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
1893 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1894 dst = cv::cvarrToMat(dstarr), mask;
1895 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1897 mask = cv::cvarrToMat(maskarr);
1898 cv::add( src1, src2, dst, mask, dst.type() );
1902 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
1904 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1905 dst = cv::cvarrToMat(dstarr), mask;
1906 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1908 mask = cv::cvarrToMat(maskarr);
1909 cv::subtract( src1, src2, dst, mask, dst.type() );
1913 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr )
1915 cv::Mat src1 = cv::cvarrToMat(srcarr1),
1916 dst = cv::cvarrToMat(dstarr), mask;
1917 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1919 mask = cv::cvarrToMat(maskarr);
1920 cv::add( src1, (const cv::Scalar&)value, dst, mask, dst.type() );
1924 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr )
1926 cv::Mat src1 = cv::cvarrToMat(srcarr1),
1927 dst = cv::cvarrToMat(dstarr), mask;
1928 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1930 mask = cv::cvarrToMat(maskarr);
1931 cv::subtract( (const cv::Scalar&)value, src1, dst, mask, dst.type() );
1935 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2,
1936 CvArr* dstarr, double scale )
1938 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1939 dst = cv::cvarrToMat(dstarr);
1940 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1941 cv::multiply( src1, src2, dst, scale, dst.type() );
1945 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2,
1946 CvArr* dstarr, double scale )
1948 cv::Mat src2 = cv::cvarrToMat(srcarr2),
1949 dst = cv::cvarrToMat(dstarr), mask;
1950 CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() );
1953 cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() );
1955 cv::divide( scale, src2, dst, dst.type() );
1960 cvAddWeighted( const CvArr* srcarr1, double alpha,
1961 const CvArr* srcarr2, double beta,
1962 double gamma, CvArr* dstarr )
1964 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
1965 dst = cv::cvarrToMat(dstarr);
1966 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
1967 cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() );
1972 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr )
1974 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
1975 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
1977 cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst );
1982 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar scalar )
1984 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
1985 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
1987 cv::absdiff( src1, (const cv::Scalar&)scalar, dst );
1992 cvInRange( const void* srcarr1, const void* srcarr2,
1993 const void* srcarr3, void* dstarr )
1995 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
1996 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
1998 cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst );
2003 cvInRangeS( const void* srcarr1, CvScalar lowerb, CvScalar upperb, void* dstarr )
2005 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2006 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2008 cv::inRange( src1, (const cv::Scalar&)lowerb, (const cv::Scalar&)upperb, dst );
2013 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op )
2015 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2016 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2018 cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op );
2023 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op )
2025 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2026 CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
2028 cv::compare( src1, value, dst, cmp_op );
2033 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr )
2035 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2036 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2038 cv::min( src1, cv::cvarrToMat(srcarr2), dst );
2043 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr )
2045 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2046 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2048 cv::max( src1, cv::cvarrToMat(srcarr2), dst );
2053 cvMinS( const void* srcarr1, double value, void* dstarr )
2055 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2056 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2058 cv::min( src1, value, dst );
2063 cvMaxS( const void* srcarr1, double value, void* dstarr )
2065 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
2066 CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
2068 cv::max( src1, value, dst );
2073 namespace cv { namespace hal {
2075 //=======================================
2077 #if (ARITHM_USE_IPP == 1)
2078 static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
2081 step1 = step2 = step = width*elemSize;
2083 #define CALL_IPP_BIN_E_12(fun) \
2086 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2087 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \
2089 CV_IMPL_ADD(CV_IMPL_IPP); \
2092 setIppErrorStatus(); \
2095 #define CALL_IPP_BIN_E_21(fun) \
2098 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2099 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \
2101 CV_IMPL_ADD(CV_IMPL_IPP); \
2104 setIppErrorStatus(); \
2107 #define CALL_IPP_BIN_12(fun) \
2110 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2111 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \
2113 CV_IMPL_ADD(CV_IMPL_IPP); \
2116 setIppErrorStatus(); \
2119 #define CALL_IPP_BIN_21(fun) \
2122 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2123 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
2125 CV_IMPL_ADD(CV_IMPL_IPP); \
2128 setIppErrorStatus(); \
2132 #define CALL_IPP_BIN_E_12(fun)
2133 #define CALL_IPP_BIN_E_21(fun)
2134 #define CALL_IPP_BIN_12(fun)
2135 #define CALL_IPP_BIN_21(fun)
2139 //=======================================
2141 //=======================================
2143 void add8u( const uchar* src1, size_t step1,
2144 const uchar* src2, size_t step2,
2145 uchar* dst, size_t step, int width, int height, void* )
2147 CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height)
2148 CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs)
2149 (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2152 void add8s( const schar* src1, size_t step1,
2153 const schar* src2, size_t step2,
2154 schar* dst, size_t step, int width, int height, void* )
2156 CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height)
2157 vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height);
2160 void add16u( const ushort* src1, size_t step1,
2161 const ushort* src2, size_t step2,
2162 ushort* dst, size_t step, int width, int height, void* )
2164 CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height)
2165 CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs)
2166 (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
2169 void add16s( const short* src1, size_t step1,
2170 const short* src2, size_t step2,
2171 short* dst, size_t step, int width, int height, void* )
2173 CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height)
2174 CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs)
2175 (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height));
2178 void add32s( const int* src1, size_t step1,
2179 const int* src2, size_t step2,
2180 int* dst, size_t step, int width, int height, void* )
2182 CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height)
2183 vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height);
2186 void add32f( const float* src1, size_t step1,
2187 const float* src2, size_t step2,
2188 float* dst, size_t step, int width, int height, void* )
2190 CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height)
2191 CALL_IPP_BIN_12(ippiAdd_32f_C1R)
2192 (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height));
2195 void add64f( const double* src1, size_t step1,
2196 const double* src2, size_t step2,
2197 double* dst, size_t step, int width, int height, void* )
2199 CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height)
2200 vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height);
2203 //=======================================
2205 //=======================================
2207 void sub8u( const uchar* src1, size_t step1,
2208 const uchar* src2, size_t step2,
2209 uchar* dst, size_t step, int width, int height, void* )
2211 CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height)
2212 CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs)
2213 (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2216 void sub8s( const schar* src1, size_t step1,
2217 const schar* src2, size_t step2,
2218 schar* dst, size_t step, int width, int height, void* )
2220 CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height)
2221 vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height);
2224 void sub16u( const ushort* src1, size_t step1,
2225 const ushort* src2, size_t step2,
2226 ushort* dst, size_t step, int width, int height, void* )
2228 CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height)
2229 CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs)
2230 (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
2233 void sub16s( const short* src1, size_t step1,
2234 const short* src2, size_t step2,
2235 short* dst, size_t step, int width, int height, void* )
2237 CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height)
2238 CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs)
2239 (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height));
2242 void sub32s( const int* src1, size_t step1,
2243 const int* src2, size_t step2,
2244 int* dst, size_t step, int width, int height, void* )
2246 CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height)
2247 vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height);
2250 void sub32f( const float* src1, size_t step1,
2251 const float* src2, size_t step2,
2252 float* dst, size_t step, int width, int height, void* )
2254 CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height)
2255 CALL_IPP_BIN_21(ippiSub_32f_C1R)
2256 (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height));
2259 void sub64f( const double* src1, size_t step1,
2260 const double* src2, size_t step2,
2261 double* dst, size_t step, int width, int height, void* )
2263 CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height)
2264 vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height);
2267 //=======================================
2269 #if (ARITHM_USE_IPP == 1)
2270 #define CALL_IPP_MIN_MAX(fun, type) \
2273 type* s1 = (type*)src1; \
2274 type* s2 = (type*)src2; \
2276 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2278 for(; i < height; i++) \
2280 if (0 > CV_INSTRUMENT_FUN_IPP(fun, s1, s2, d, width)) \
2282 s1 = (type*)((uchar*)s1 + step1); \
2283 s2 = (type*)((uchar*)s2 + step2); \
2284 d = (type*)((uchar*)d + step); \
2288 CV_IMPL_ADD(CV_IMPL_IPP); \
2291 setIppErrorStatus(); \
2294 #define CALL_IPP_MIN_MAX(fun, type)
2297 //=======================================
2299 //=======================================
2301 void max8u( const uchar* src1, size_t step1,
2302 const uchar* src2, size_t step2,
2303 uchar* dst, size_t step, int width, int height, void* )
2305 CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height)
2306 CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar)
2307 vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
2310 void max8s( const schar* src1, size_t step1,
2311 const schar* src2, size_t step2,
2312 schar* dst, size_t step, int width, int height, void* )
2314 CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height)
2315 vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height);
2318 void max16u( const ushort* src1, size_t step1,
2319 const ushort* src2, size_t step2,
2320 ushort* dst, size_t step, int width, int height, void* )
2322 CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height)
2323 CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort)
2324 vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
2327 void max16s( const short* src1, size_t step1,
2328 const short* src2, size_t step2,
2329 short* dst, size_t step, int width, int height, void* )
2331 CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height)
2332 vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height);
2335 void max32s( const int* src1, size_t step1,
2336 const int* src2, size_t step2,
2337 int* dst, size_t step, int width, int height, void* )
2339 CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height)
2340 vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height);
2343 void max32f( const float* src1, size_t step1,
2344 const float* src2, size_t step2,
2345 float* dst, size_t step, int width, int height, void* )
2347 CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height)
2348 CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float)
2349 vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height);
2352 void max64f( const double* src1, size_t step1,
2353 const double* src2, size_t step2,
2354 double* dst, size_t step, int width, int height, void* )
2356 CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height)
2357 CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double)
2358 vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height);
2361 //=======================================
2363 //=======================================
2365 void min8u( const uchar* src1, size_t step1,
2366 const uchar* src2, size_t step2,
2367 uchar* dst, size_t step, int width, int height, void* )
2369 CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height)
2370 CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar)
2371 vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
2374 void min8s( const schar* src1, size_t step1,
2375 const schar* src2, size_t step2,
2376 schar* dst, size_t step, int width, int height, void* )
2378 CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height)
2379 vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height);
2382 void min16u( const ushort* src1, size_t step1,
2383 const ushort* src2, size_t step2,
2384 ushort* dst, size_t step, int width, int height, void* )
2386 CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height)
2387 CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort)
2388 vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
2391 void min16s( const short* src1, size_t step1,
2392 const short* src2, size_t step2,
2393 short* dst, size_t step, int width, int height, void* )
2395 CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height)
2396 vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height);
2399 void min32s( const int* src1, size_t step1,
2400 const int* src2, size_t step2,
2401 int* dst, size_t step, int width, int height, void* )
2403 CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height)
2404 vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height);
2407 void min32f( const float* src1, size_t step1,
2408 const float* src2, size_t step2,
2409 float* dst, size_t step, int width, int height, void* )
2411 CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height)
2412 CALL_IPP_MIN_MAX(ippsMinEvery_32f, float)
2413 vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height);
2416 void min64f( const double* src1, size_t step1,
2417 const double* src2, size_t step2,
2418 double* dst, size_t step, int width, int height, void* )
2420 CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height)
2421 CALL_IPP_MIN_MAX(ippsMinEvery_64f, double)
2422 vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height);
2425 //=======================================
2427 //=======================================
2429 void absdiff8u( const uchar* src1, size_t step1,
2430 const uchar* src2, size_t step2,
2431 uchar* dst, size_t step, int width, int height, void* )
2433 CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height)
2434 CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R)
2435 (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2438 void absdiff8s( const schar* src1, size_t step1,
2439 const schar* src2, size_t step2,
2440 schar* dst, size_t step, int width, int height, void* )
2442 CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height)
2443 vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height);
2446 void absdiff16u( const ushort* src1, size_t step1,
2447 const ushort* src2, size_t step2,
2448 ushort* dst, size_t step, int width, int height, void* )
2450 CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height)
2451 CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R)
2452 (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
2455 void absdiff16s( const short* src1, size_t step1,
2456 const short* src2, size_t step2,
2457 short* dst, size_t step, int width, int height, void* )
2459 CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height)
2460 vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height);
2463 void absdiff32s( const int* src1, size_t step1,
2464 const int* src2, size_t step2,
2465 int* dst, size_t step, int width, int height, void* )
2467 CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height)
2468 vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height);
2471 void absdiff32f( const float* src1, size_t step1,
2472 const float* src2, size_t step2,
2473 float* dst, size_t step, int width, int height, void* )
2475 CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height)
2476 CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R)
2477 (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height));
2480 void absdiff64f( const double* src1, size_t step1,
2481 const double* src2, size_t step2,
2482 double* dst, size_t step, int width, int height, void* )
2484 CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height)
2485 vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height);
2488 //=======================================
2490 //=======================================
2492 #if (ARITHM_USE_IPP == 1)
2493 #define CALL_IPP_UN(fun) \
2496 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \
2497 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
2499 CV_IMPL_ADD(CV_IMPL_IPP); \
2502 setIppErrorStatus(); \
2505 #define CALL_IPP_UN(fun)
2508 void and8u( const uchar* src1, size_t step1,
2509 const uchar* src2, size_t step2,
2510 uchar* dst, size_t step, int width, int height, void* )
2512 CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height)
2513 CALL_IPP_BIN_12(ippiAnd_8u_C1R)
2514 (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2517 void or8u( const uchar* src1, size_t step1,
2518 const uchar* src2, size_t step2,
2519 uchar* dst, size_t step, int width, int height, void* )
2521 CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height)
2522 CALL_IPP_BIN_12(ippiOr_8u_C1R)
2523 (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2526 void xor8u( const uchar* src1, size_t step1,
2527 const uchar* src2, size_t step2,
2528 uchar* dst, size_t step, int width, int height, void* )
2530 CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height)
2531 CALL_IPP_BIN_12(ippiXor_8u_C1R)
2532 (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2535 void not8u( const uchar* src1, size_t step1,
2536 const uchar* src2, size_t step2,
2537 uchar* dst, size_t step, int width, int height, void* )
2539 CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height)
2540 CALL_IPP_UN(ippiNot_8u_C1R)
2541 (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
2544 //=======================================
2547 inline static IppCmpOp convert_cmp(int _cmpop)
2549 return _cmpop == CMP_EQ ? ippCmpEq :
2550 _cmpop == CMP_GT ? ippCmpGreater :
2551 _cmpop == CMP_GE ? ippCmpGreaterEq :
2552 _cmpop == CMP_LT ? ippCmpLess :
2553 _cmpop == CMP_LE ? ippCmpLessEq :
2556 #define CALL_IPP_CMP(fun) \
2559 IppCmpOp op = convert_cmp(*(int *)_cmpop); \
2562 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
2563 if (0 <= CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \
2565 CV_IMPL_ADD(CV_IMPL_IPP); \
2568 setIppErrorStatus(); \
2572 #define CALL_IPP_CMP(fun)
2575 //=======================================
2577 //=======================================
2579 void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2580 uchar* dst, size_t step, int width, int height, void* _cmpop)
2582 CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2583 CALL_IPP_CMP(ippiCompare_8u_C1R)
2584 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2585 int code = *(int*)_cmpop;
2586 step1 /= sizeof(src1[0]);
2587 step2 /= sizeof(src2[0]);
2588 if( code == CMP_GE || code == CMP_LT )
2590 std::swap(src1, src2);
2591 std::swap(step1, step2);
2592 code = code == CMP_GE ? CMP_LE : CMP_GT;
2595 if( code == CMP_GT || code == CMP_LE )
2597 int m = code == CMP_GT ? 0 : 255;
2598 for( ; height--; src1 += step1, src2 += step2, dst += step )
2604 v_uint8x16 mask = v_setall_u8((uchar)m);
2606 for( ; x <= width - v_uint8x16::nlanes; x += v_uint8x16::nlanes )
2608 v_store(dst + x, (v_load(src1 + x) > v_load(src2 + x)) ^ mask);
2613 for( ; x < width; x++ ){
2614 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
2618 else if( code == CMP_EQ || code == CMP_NE )
2620 int m = code == CMP_EQ ? 0 : 255;
2621 for( ; height--; src1 += step1, src2 += step2, dst += step )
2627 v_uint8x16 mask = v_setall_u8((uchar)m);
2629 for( ; x <= width - v_uint8x16::nlanes; x += v_uint8x16::nlanes )
2631 v_store(dst+x, (v_load(src1+x) == v_load(src2+x)) ^ mask);
2635 for( ; x < width; x++ )
2636 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
2641 void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
2642 uchar* dst, size_t step, int width, int height, void* _cmpop)
2644 CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2645 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2648 void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2649 uchar* dst, size_t step, int width, int height, void* _cmpop)
2651 CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2652 CALL_IPP_CMP(ippiCompare_16u_C1R)
2653 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2656 void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
2657 uchar* dst, size_t step, int width, int height, void* _cmpop)
2659 CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2660 CALL_IPP_CMP(ippiCompare_16s_C1R)
2661 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2663 int code = *(int*)_cmpop;
2664 step1 /= sizeof(src1[0]);
2665 step2 /= sizeof(src2[0]);
2666 if( code == CMP_GE || code == CMP_LT )
2668 std::swap(src1, src2);
2669 std::swap(step1, step2);
2670 code = code == CMP_GE ? CMP_LE : CMP_GT;
2673 if( code == CMP_GT || code == CMP_LE )
2675 int m = code == CMP_GT ? 0 : 255;
2676 for( ; height--; src1 += step1, src2 += step2, dst += step )
2682 v_uint8x16 mask = v_setall_u8((uchar)m);
2683 const int dWidth = v_uint8x16::nlanes;
2685 for( ; x <= width - dWidth; x += dWidth )
2687 v_int16x8 in1 = v_load(src1 + x);
2688 v_int16x8 in2 = v_load(src2 + x);
2689 v_uint16x8 t1 = v_reinterpret_as_u16(in1 > in2);
2691 in1 = v_load(src1 + x + v_uint16x8::nlanes);
2692 in2 = v_load(src2 + x + v_uint16x8::nlanes);
2693 v_uint16x8 t2 = v_reinterpret_as_u16(in1 > in2);
2695 v_store(dst+x, (v_pack(t1, t2)) ^ mask);
2699 for( ; x < width; x++ ){
2700 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
2704 else if( code == CMP_EQ || code == CMP_NE )
2706 int m = code == CMP_EQ ? 0 : 255;
2707 for( ; height--; src1 += step1, src2 += step2, dst += step )
2713 v_uint8x16 mask = v_setall_u8((uchar)m);
2714 const int dWidth = v_uint8x16::nlanes;
2716 for( ; x <= width - dWidth; x += dWidth )
2718 v_int16x8 in1 = v_load(src1 + x);
2719 v_int16x8 in2 = v_load(src2 + x);
2720 v_uint16x8 t1 = v_reinterpret_as_u16(in1 == in2);
2722 in1 = v_load(src1 + x + 8);
2723 in2 = v_load(src2 + x + 8);
2724 v_uint16x8 t2 = v_reinterpret_as_u16(in1 == in2);
2726 v_store(dst+x, (v_pack(t1, t2)^ mask));
2730 for( ; x < width; x++ )
2731 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
2736 void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
2737 uchar* dst, size_t step, int width, int height, void* _cmpop)
2739 CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2740 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2743 void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
2744 uchar* dst, size_t step, int width, int height, void* _cmpop)
2746 CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2747 CALL_IPP_CMP(ippiCompare_32f_C1R)
2748 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2751 void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
2752 uchar* dst, size_t step, int width, int height, void* _cmpop)
2754 CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
2755 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
2758 //=======================================
2760 #if defined HAVE_IPP
2761 #define CALL_IPP_MUL(fun) \
2764 if (std::fabs(fscale - 1) <= FLT_EPSILON) \
2766 if (CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \
2768 CV_IMPL_ADD(CV_IMPL_IPP); \
2771 setIppErrorStatus(); \
2775 #define CALL_IPP_MUL_2(fun) \
2778 if (std::fabs(fscale - 1) <= FLT_EPSILON) \
2780 if (CV_INSTRUMENT_FUN_IPP(fun, src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \
2782 CV_IMPL_ADD(CV_IMPL_IPP); \
2785 setIppErrorStatus(); \
2790 #define CALL_IPP_MUL(fun)
2791 #define CALL_IPP_MUL_2(fun)
2794 //=======================================
2796 //=======================================
2798 void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2799 uchar* dst, size_t step, int width, int height, void* scale)
2801 CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2802 float fscale = (float)*(const double*)scale;
2803 CALL_IPP_MUL(ippiMul_8u_C1RSfs)
2804 mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
2807 void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
2808 schar* dst, size_t step, int width, int height, void* scale)
2810 CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2811 mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale);
2814 void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2815 ushort* dst, size_t step, int width, int height, void* scale)
2817 CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2818 float fscale = (float)*(const double*)scale;
2819 CALL_IPP_MUL(ippiMul_16u_C1RSfs)
2820 mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
2823 void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
2824 short* dst, size_t step, int width, int height, void* scale)
2826 CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2827 float fscale = (float)*(const double*)scale;
2828 CALL_IPP_MUL(ippiMul_16s_C1RSfs)
2829 mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
2832 void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
2833 int* dst, size_t step, int width, int height, void* scale)
2835 CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2836 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2839 void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
2840 float* dst, size_t step, int width, int height, void* scale)
2842 CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2843 float fscale = (float)*(const double*)scale;
2844 CALL_IPP_MUL_2(ippiMul_32f_C1R)
2845 mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
2848 void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
2849 double* dst, size_t step, int width, int height, void* scale)
2851 CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2852 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2855 //=======================================
2857 //=======================================
2859 void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
2860 uchar* dst, size_t step, int width, int height, void* scale)
2862 CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2864 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2866 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2869 void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
2870 schar* dst, size_t step, int width, int height, void* scale)
2872 CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2873 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2876 void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
2877 ushort* dst, size_t step, int width, int height, void* scale)
2879 CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2880 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2883 void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
2884 short* dst, size_t step, int width, int height, void* scale)
2886 CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2887 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2890 void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
2891 int* dst, size_t step, int width, int height, void* scale)
2893 CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2894 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2897 void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
2898 float* dst, size_t step, int width, int height, void* scale)
2900 CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2901 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2904 void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
2905 double* dst, size_t step, int width, int height, void* scale)
2907 CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
2908 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
2911 //=======================================
2913 //=======================================
2915 void recip8u( const uchar*, size_t, const uchar* src2, size_t step2,
2916 uchar* dst, size_t step, int width, int height, void* scale)
2918 CALL_HAL(recip8u, cv_hal_recip8u, src2, step2, dst, step, width, height, *(const double*)scale)
2919 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2922 void recip8s( const schar*, size_t, const schar* src2, size_t step2,
2923 schar* dst, size_t step, int width, int height, void* scale)
2925 CALL_HAL(recip8s, cv_hal_recip8s, src2, step2, dst, step, width, height, *(const double*)scale)
2926 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2929 void recip16u( const ushort*, size_t, const ushort* src2, size_t step2,
2930 ushort* dst, size_t step, int width, int height, void* scale)
2932 CALL_HAL(recip16u, cv_hal_recip16u, src2, step2, dst, step, width, height, *(const double*)scale)
2933 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2936 void recip16s( const short*, size_t, const short* src2, size_t step2,
2937 short* dst, size_t step, int width, int height, void* scale)
2939 CALL_HAL(recip16s, cv_hal_recip16s, src2, step2, dst, step, width, height, *(const double*)scale)
2940 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2943 void recip32s( const int*, size_t, const int* src2, size_t step2,
2944 int* dst, size_t step, int width, int height, void* scale)
2946 CALL_HAL(recip32s, cv_hal_recip32s, src2, step2, dst, step, width, height, *(const double*)scale)
2947 recip_i(src2, step2, dst, step, width, height, *(const double*)scale);
2950 void recip32f( const float*, size_t, const float* src2, size_t step2,
2951 float* dst, size_t step, int width, int height, void* scale)
2953 CALL_HAL(recip32f, cv_hal_recip32f, src2, step2, dst, step, width, height, *(const double*)scale)
2954 recip_f(src2, step2, dst, step, width, height, *(const double*)scale);
2957 void recip64f( const double*, size_t, const double* src2, size_t step2,
2958 double* dst, size_t step, int width, int height, void* scale)
2960 CALL_HAL(recip64f, cv_hal_recip64f, src2, step2, dst, step, width, height, *(const double*)scale)
2961 recip_f(src2, step2, dst, step, width, height, *(const double*)scale);
2964 //=======================================
2966 //=======================================
2969 addWeighted8u( const uchar* src1, size_t step1,
2970 const uchar* src2, size_t step2,
2971 uchar* dst, size_t step, int width, int height,
2974 CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
2975 const double* scalars_ = (const double*)scalars;
2976 float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2];
2978 for( ; height--; src1 += step1, src2 += step2, dst += step )
2985 v_float32x4 g = v_setall_f32(gamma);
2986 v_float32x4 a = v_setall_f32(alpha);
2987 v_float32x4 b = v_setall_f32(beta);
2989 for( ; x <= width - v_uint16x8::nlanes; x += v_uint16x8::nlanes )
2991 v_uint16x8 in1_16 = v_load_expand(src1 + x);
2992 v_int32x4 in1_32_l, in1_32_h;
2993 v_expand(v_reinterpret_as_s16(in1_16), in1_32_l, in1_32_h);
2994 v_float32x4 in1_f_l = v_cvt_f32(in1_32_l);
2995 v_float32x4 in1_f_h = v_cvt_f32(in1_32_h);
2997 v_uint16x8 in2_16 = v_load_expand(src2 + x);
2998 v_int32x4 in2_32_l, in2_32_h;
2999 v_expand(v_reinterpret_as_s16(in2_16), in2_32_l, in2_32_h);
3000 v_float32x4 in2_f_l = v_cvt_f32(in2_32_l);
3001 v_float32x4 in2_f_h = v_cvt_f32(in2_32_h);
3003 v_int32x4 out_l = v_round(in1_f_l * a + in2_f_l * b + g);
3004 v_int32x4 out_h = v_round(in1_f_h * a + in2_f_h * b + g);
3006 v_int16x8 out_16 = v_pack(out_l, out_h);
3007 v_pack_u_store(dst + x, out_16);
3011 #if CV_ENABLE_UNROLLED
3012 for( ; x <= width - 4; x += 4 )
3015 t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
3016 t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
3018 dst[x] = saturate_cast<uchar>(t0);
3019 dst[x+1] = saturate_cast<uchar>(t1);
3021 t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
3022 t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
3024 dst[x+2] = saturate_cast<uchar>(t0);
3025 dst[x+3] = saturate_cast<uchar>(t1);
3029 for( ; x < width; x++ )
3031 float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
3032 dst[x] = saturate_cast<uchar>(t0);
3037 void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
3038 schar* dst, size_t step, int width, int height, void* scalars )
3040 CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3041 addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
3044 void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
3045 ushort* dst, size_t step, int width, int height, void* scalars )
3047 CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3048 addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
3051 void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
3052 short* dst, size_t step, int width, int height, void* scalars )
3054 CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3055 addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
3058 void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
3059 int* dst, size_t step, int width, int height, void* scalars )
3061 CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3062 addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
3065 void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
3066 float* dst, size_t step, int width, int height, void* scalars )
3068 CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3069 addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
3072 void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
3073 double* dst, size_t step, int width, int height, void* scalars )
3075 CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
3076 addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars);