1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
44 #include "opencl_kernels_core.hpp"
46 ///////////////////////////////// UMat implementation ///////////////////////////////
50 // it should be a prime number for the best hash function
51 enum { UMAT_NLOCKS = 31 };
52 static Mutex umatLocks[UMAT_NLOCKS];
54 UMatData::UMatData(const MatAllocator* allocator)
56 prevAllocator = currAllocator = allocator;
57 urefcount = refcount = 0;
59 size = 0; capacity = 0;
68 prevAllocator = currAllocator = 0;
69 urefcount = refcount = 0;
71 size = 0; capacity = 0;
80 umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
83 void UMatData::unlock()
85 umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
89 MatAllocator* UMat::getStdAllocator()
92 if( ocl::haveOpenCL() && ocl::useOpenCL() )
93 return ocl::getOpenCLAllocator();
95 return Mat::getStdAllocator();
98 void swap( UMat& a, UMat& b )
100 std::swap(a.flags, b.flags);
101 std::swap(a.dims, b.dims);
102 std::swap(a.rows, b.rows);
103 std::swap(a.cols, b.cols);
104 std::swap(a.allocator, b.allocator);
106 std::swap(a.offset, b.offset);
108 std::swap(a.size.p, b.size.p);
109 std::swap(a.step.p, b.step.p);
110 std::swap(a.step.buf[0], b.step.buf[0]);
111 std::swap(a.step.buf[1], b.step.buf[1]);
113 if( a.step.p == b.step.buf )
115 a.step.p = a.step.buf;
119 if( b.step.p == a.step.buf )
121 b.step.p = b.step.buf;
127 static inline void setSize( UMat& m, int _dims, const int* _sz,
128 const size_t* _steps, bool autoSteps=false )
130 CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
131 if( m.dims != _dims )
133 if( m.step.p != m.step.buf )
136 m.step.p = m.step.buf;
141 m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
142 m.size.p = (int*)(m.step.p + _dims) + 1;
143 m.size.p[-1] = _dims;
144 m.rows = m.cols = -1;
152 size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
154 for( i = _dims-1; i >= 0; i-- )
161 m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
165 int64 total1 = (int64)total*s;
166 if( (uint64)total1 != (size_t)total1 )
167 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
168 total = (size_t)total1;
180 static void updateContinuityFlag(UMat& m)
183 for( i = 0; i < m.dims; i++ )
189 for( j = m.dims-1; j > i; j-- )
191 if( m.step[j]*m.size[j] < m.step[j-1] )
195 uint64 total = (uint64)m.step[0]*m.size[0];
196 if( j <= i && total == (size_t)total )
197 m.flags |= UMat::CONTINUOUS_FLAG;
199 m.flags &= ~UMat::CONTINUOUS_FLAG;
203 static void finalizeHdr(UMat& m)
205 updateContinuityFlag(m);
208 m.rows = m.cols = -1;
211 UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
216 UMatData* temp_u = u;
219 MatAllocator *a = allocator, *a0 = getStdAllocator();
222 temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
223 temp_u->refcount = 1;
225 UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
227 setSize(hdr, dims, size.p, step.p);
230 hdr.offset = data - datastart;
235 void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
237 this->usageFlags = _usageFlags;
240 CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
241 _type = CV_MAT_TYPE(_type);
243 if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
245 if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
247 for( i = 0; i < d; i++ )
248 if( size[i] != _sizes[i] )
250 if( i == d && (d > 1 || size[1] == 1))
257 flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
258 setSize(*this, d, _sizes, 0, true);
263 MatAllocator *a = allocator, *a0 = getStdAllocator();
268 u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
274 u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
277 CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
284 void UMat::copySize(const UMat& m)
286 setSize(*this, m.dims, 0, 0);
287 for( int i = 0; i < dims; i++ )
298 if( step.p != step.buf )
302 void UMat::deallocate()
304 u->currAllocator->deallocate(u);
309 UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
310 : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
312 CV_Assert( m.dims >= 2 );
315 AutoBuffer<Range> rs(m.dims);
318 for( int i = 2; i < m.dims; i++ )
319 rs[i] = Range::all();
325 if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
327 CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
328 rows = _rowRange.size();
329 offset += step*_rowRange.start;
330 flags |= SUBMATRIX_FLAG;
333 if( _colRange != Range::all() && _colRange != Range(0,cols) )
335 CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
336 cols = _colRange.size();
337 offset += _colRange.start*elemSize();
338 flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
339 flags |= SUBMATRIX_FLAG;
343 flags |= CONTINUOUS_FLAG;
345 if( rows <= 0 || cols <= 0 )
353 UMat::UMat(const UMat& m, const Rect& roi)
354 : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
355 allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
357 CV_Assert( m.dims <= 2 );
358 flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
359 flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
361 size_t esz = CV_ELEM_SIZE(flags);
363 CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
364 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
366 CV_XADD(&(u->urefcount), 1);
367 if( roi.width < m.cols || roi.height < m.rows )
368 flags |= SUBMATRIX_FLAG;
370 step[0] = m.step[0]; step[1] = esz;
372 if( rows <= 0 || cols <= 0 )
380 UMat::UMat(const UMat& m, const Range* ranges)
381 : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
386 for( i = 0; i < d; i++ )
389 CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
392 for( i = 0; i < d; i++ )
395 if( r != Range::all() && r != Range(0, size.p[i]))
397 size.p[i] = r.end - r.start;
398 offset += r.start*step.p[i];
399 flags |= SUBMATRIX_FLAG;
402 updateContinuityFlag(*this);
405 UMat UMat::diag(int d) const
407 CV_Assert( dims <= 2 );
409 size_t esz = elemSize();
414 len = std::min(cols - d, rows);
419 len = std::min(rows + d, cols);
420 m.offset -= step[0]*d;
422 CV_DbgAssert( len > 0 );
424 m.size[0] = m.rows = len;
425 m.size[1] = m.cols = 1;
426 m.step[0] += (len > 1 ? esz : 0);
429 m.flags &= ~CONTINUOUS_FLAG;
431 m.flags |= CONTINUOUS_FLAG;
433 if( size() != Size(1,1) )
434 m.flags |= SUBMATRIX_FLAG;
439 void UMat::locateROI( Size& wholeSize, Point& ofs ) const
441 CV_Assert( dims <= 2 && step[0] > 0 );
442 size_t esz = elemSize(), minstep;
443 ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
449 ofs.y = (int)(delta1/step[0]);
450 ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
451 CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
453 minstep = (ofs.x + cols)*esz;
454 wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
455 wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
456 wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
457 wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
461 UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
463 CV_Assert( dims <= 2 && step[0] > 0 );
464 Size wholeSize; Point ofs;
465 size_t esz = elemSize();
466 locateROI( wholeSize, ofs );
467 int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
468 int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
469 offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
470 rows = row2 - row1; cols = col2 - col1;
471 size.p[0] = rows; size.p[1] = cols;
472 if( esz*cols == step[0] || rows == 1 )
473 flags |= CONTINUOUS_FLAG;
475 flags &= ~CONTINUOUS_FLAG;
480 UMat UMat::reshape(int new_cn, int new_rows) const
485 if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
487 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
488 hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
489 hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
493 CV_Assert( dims <= 2 );
498 int total_width = cols * cn;
500 if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
501 new_rows = rows * total_width / new_cn;
503 if( new_rows != 0 && new_rows != rows )
505 int total_size = total_width * rows;
506 if( !isContinuous() )
507 CV_Error( CV_BadStep,
508 "The matrix is not continuous, thus its number of rows can not be changed" );
510 if( (unsigned)new_rows > (unsigned)total_size )
511 CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
513 total_width = total_size / new_rows;
515 if( total_width * new_rows != total_size )
516 CV_Error( CV_StsBadArg, "The total number of matrix elements "
517 "is not divisible by the new number of rows" );
520 hdr.step[0] = total_width * elemSize1();
523 int new_width = total_width / new_cn;
525 if( new_width * new_cn != total_width )
526 CV_Error( CV_BadNumChannels,
527 "The total width is not divisible by the new number of channels" );
529 hdr.cols = new_width;
530 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
531 hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
535 UMat UMat::diag(const UMat& d)
537 CV_Assert( d.cols == 1 || d.rows == 1 );
538 int len = d.rows + d.cols - 1;
539 UMat m(len, len, d.type(), Scalar(0));
548 int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
550 return (depth() == _depth || _depth <= 0) &&
551 (isContinuous() || !_requireContinuous) &&
552 ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
553 (cols == _elemChannels && channels() == 1))) ||
554 (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
555 (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
556 ? (int)(total()*channels()/_elemChannels) : -1;
559 UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
561 if(_newndims == dims)
566 return reshape(_cn, _newsz[0]);
569 CV_Error(CV_StsNotImplemented, "");
575 Mat UMat::getMat(int accessFlags) const
579 u->currAllocator->map(u, accessFlags | ACCESS_READ);
580 CV_Assert(u->data != 0);
581 Mat hdr(dims, size.p, type(), u->data + offset, step.p);
584 hdr.datastart = u->data;
585 hdr.data = u->data + offset;
586 hdr.datalimit = hdr.dataend = u->data + u->size;
587 CV_XADD(&hdr.u->refcount, 1);
591 void* UMat::handle(int accessFlags) const
596 if ((accessFlags & ACCESS_WRITE) != 0)
597 u->markHostCopyObsolete(true);
599 // check flags: if CPU copy is newer, copy it back to GPU.
600 if( u->deviceCopyObsolete() )
602 CV_Assert(u->refcount == 0);
603 u->currAllocator->unmap(u);
608 void UMat::ndoffset(size_t* ofs) const
610 // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
612 for( int i = 0; i < dims; i++ )
614 size_t s = step.p[i];
620 void UMat::copyTo(OutputArray _dst) const
622 int dtype = _dst.type();
623 if( _dst.fixedType() && dtype != type() )
625 CV_Assert( channels() == CV_MAT_CN(dtype) );
626 convertTo( _dst, dtype );
636 size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
637 for( i = 0; i < (size_t)dims; i++ )
641 srcofs[dims-1] *= esz;
643 _dst.create( dims, size.p, type() );
646 UMat dst = _dst.getUMat();
647 if( u == dst.u && dst.offset == offset )
650 if (u->currAllocator == dst.u->currAllocator)
652 dst.ndoffset(dstofs);
653 dstofs[dims-1] *= esz;
654 u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
659 Mat dst = _dst.getMat();
660 u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p);
663 void UMat::copyTo(OutputArray _dst, InputArray _mask) const
671 int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
672 CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
674 if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
676 UMatData * prevu = _dst.getUMat().u;
677 _dst.create( dims, size, type() );
679 UMat dst = _dst.getUMat();
681 bool haveDstUninit = false;
682 if( prevu != dst.u ) // do not leave dst uninitialized
683 haveDstUninit = true;
685 String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
686 ocl::memopTypeToStr(depth()), cn, mcn,
687 haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
689 ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
692 k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
693 ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
694 haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
695 ocl::KernelArg::ReadWrite(dst));
697 size_t globalsize[2] = { cols, rows };
698 if (k.run(2, globalsize, NULL, false))
703 Mat src = getMat(ACCESS_READ);
704 src.copyTo(_dst, _mask);
707 void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
709 bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
710 int stype = type(), cn = CV_MAT_CN(stype);
713 _type = _dst.fixedType() ? _dst.type() : stype;
715 _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);
717 int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
718 if( sdepth == ddepth && noScale )
724 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
725 bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
726 if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
727 ((needDouble && doubleSupport) || !needDouble) )
729 int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;
732 ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
733 format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
734 ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
735 ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
736 ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
737 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
741 _dst.create( size(), _type );
742 UMat dst = _dst.getUMat();
744 float alphaf = (float)alpha, betaf = (float)beta;
745 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
746 dstarg = ocl::KernelArg::WriteOnly(dst, cn);
748 if (wdepth == CV_32F)
749 k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
751 k.args(srcarg, dstarg, alpha, beta, rowsPerWI);
753 size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
754 if (k.run(2, globalsize, NULL, false))
759 Mat m = getMat(ACCESS_READ);
760 m.convertTo(_dst, _type, alpha, beta);
763 UMat& UMat::setTo(InputArray _value, InputArray _mask)
765 bool haveMask = !_mask.empty();
767 int tp = type(), cn = CV_MAT_CN(tp);
769 if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
771 Mat value = _value.getMat();
772 CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
773 double buf[4] = { 0, 0, 0, 0 };
774 convertAndUnrollScalar(value, tp, (uchar *)buf, 1);
776 int scalarcn = cn == 3 ? 4 : cn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
777 String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
778 ocl::memopTypeToStr(tp), rowsPerWI,
779 ocl::memopTypeToStr(CV_MAKETYPE(tp, scalarcn)),
780 ocl::memopTypeToStr(CV_MAT_DEPTH(tp)), cn);
782 ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
785 ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE1(tp) * scalarcn);
790 mask = _mask.getUMat();
791 CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
792 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
793 dstarg = ocl::KernelArg::ReadWrite(*this);
794 setK.args(maskarg, dstarg, scalararg);
798 ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this);
799 setK.args(dstarg, scalararg);
802 size_t globalsize[] = { cols, (rows + rowsPerWI - 1) / rowsPerWI };
803 if( setK.run(2, globalsize, NULL, false) )
808 Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
809 m.setTo(_value, _mask);
813 UMat& UMat::operator = (const Scalar& s)
826 UMat UMat::inv(int method) const
829 invert(*this, m, method);
833 UMat UMat::mul(InputArray m, double scale) const
836 multiply(*this, m, dst, scale);
842 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
844 UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
846 int type = src1.type(), depth = CV_MAT_DEPTH(type),
847 kercn = ocl::predictOptimalVectorWidth(src1, src2);
848 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
850 if ( !doubleSupport && depth == CV_64F )
853 int dbsize = ocl::Device::getDefault().maxComputeUnits();
854 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
855 int ddepth = std::max(CV_32F, depth);
857 int wgs2_aligned = 1;
858 while (wgs2_aligned < (int)wgs)
863 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
864 format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
865 "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
866 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
867 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
868 ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
869 (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
870 _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
871 _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
875 UMat db(1, dbsize, ddepth);
877 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
878 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
879 dbarg = ocl::KernelArg::PtrWriteOnly(db);
881 k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
883 size_t globalsize = dbsize * wgs;
884 if (k.run(1, &globalsize, &wgs, false))
886 res = sum(db.getMat(ACCESS_READ))[0];
894 double UMat::dot(InputArray m) const
896 CV_Assert(m.sameSize(*this) && m.type() == type());
900 CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
903 return getMat(ACCESS_READ).dot(m);
906 UMat UMat::zeros(int rows, int cols, int type)
908 return UMat(rows, cols, type, Scalar::all(0));
911 UMat UMat::zeros(Size size, int type)
913 return UMat(size, type, Scalar::all(0));
916 UMat UMat::zeros(int ndims, const int* sz, int type)
918 return UMat(ndims, sz, type, Scalar::all(0));
921 UMat UMat::ones(int rows, int cols, int type)
923 return UMat::ones(Size(cols, rows), type);
926 UMat UMat::ones(Size size, int type)
928 return UMat(size, type, Scalar(1));
931 UMat UMat::ones(int ndims, const int* sz, int type)
933 return UMat(ndims, sz, type, Scalar(1));
936 UMat UMat::eye(int rows, int cols, int type)
938 return UMat::eye(Size(cols, rows), type);
941 UMat UMat::eye(Size size, int type)