modules/core/src/umatrix.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include "opencl_kernels_core.hpp"
  45
  46 ///////////////////////////////// UMat implementation ///////////////////////////////
  47
  48 namespace cv {
  49
  50 // it should be a prime number for the best hash function
  51 enum { UMAT_NLOCKS = 31 };
  52 static Mutex umatLocks[UMAT_NLOCKS];
  53
  54 UMatData::UMatData(const MatAllocator* allocator)
  55 {
  56     prevAllocator = currAllocator = allocator;
  57     urefcount = refcount = 0;
  58     data = origdata = 0;
  59     size = 0; capacity = 0;
  60     flags = 0;
  61     handle = 0;
  62     userdata = 0;
  63     allocatorFlags_ = 0;
  64 }
  65
  66 UMatData::~UMatData()
  67 {
  68     prevAllocator = currAllocator = 0;
  69     urefcount = refcount = 0;
  70     data = origdata = 0;
  71     size = 0; capacity = 0;
  72     flags = 0;
  73     handle = 0;
  74     userdata = 0;
  75     allocatorFlags_ = 0;
  76 }
  77
  78 void UMatData::lock()
  79 {
  80     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
  81 }
  82
  83 void UMatData::unlock()
  84 {
  85     umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
  86 }
  87
  88
  89 MatAllocator* UMat::getStdAllocator()
  90 {
  91 #ifdef HAVE_OPENCL
  92     if( ocl::haveOpenCL() && ocl::useOpenCL() )
  93         return ocl::getOpenCLAllocator();
  94 #endif
  95     return Mat::getStdAllocator();
  96 }
  97
  98 void swap( UMat& a, UMat& b )
  99 {
 100     std::swap(a.flags, b.flags);
 101     std::swap(a.dims, b.dims);
 102     std::swap(a.rows, b.rows);
 103     std::swap(a.cols, b.cols);
 104     std::swap(a.allocator, b.allocator);
 105     std::swap(a.u, b.u);
 106     std::swap(a.offset, b.offset);
 107
 108     std::swap(a.size.p, b.size.p);
 109     std::swap(a.step.p, b.step.p);
 110     std::swap(a.step.buf[0], b.step.buf[0]);
 111     std::swap(a.step.buf[1], b.step.buf[1]);
 112
 113     if( a.step.p == b.step.buf )
 114     {
 115         a.step.p = a.step.buf;
 116         a.size.p = &a.rows;
 117     }
 118
 119     if( b.step.p == a.step.buf )
 120     {
 121         b.step.p = b.step.buf;
 122         b.size.p = &b.rows;
 123     }
 124 }
 125
 126
 127 static inline void setSize( UMat& m, int _dims, const int* _sz,
 128                             const size_t* _steps, bool autoSteps=false )
 129 {
 130     CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
 131     if( m.dims != _dims )
 132     {
 133         if( m.step.p != m.step.buf )
 134         {
 135             fastFree(m.step.p);
 136             m.step.p = m.step.buf;
 137             m.size.p = &m.rows;
 138         }
 139         if( _dims > 2 )
 140         {
 141             m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
 142             m.size.p = (int*)(m.step.p + _dims) + 1;
 143             m.size.p[-1] = _dims;
 144             m.rows = m.cols = -1;
 145         }
 146     }
 147
 148     m.dims = _dims;
 149     if( !_sz )
 150         return;
 151
 152     size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
 153     int i;
 154     for( i = _dims-1; i >= 0; i-- )
 155     {
 156         int s = _sz[i];
 157         CV_Assert( s >= 0 );
 158         m.size.p[i] = s;
 159
 160         if( _steps )
 161             m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
 162         else if( autoSteps )
 163         {
 164             m.step.p[i] = total;
 165             int64 total1 = (int64)total*s;
 166             if( (uint64)total1 != (size_t)total1 )
 167                 CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
 168             total = (size_t)total1;
 169         }
 170     }
 171
 172     if( _dims == 1 )
 173     {
 174         m.dims = 2;
 175         m.cols = 1;
 176         m.step[1] = esz;
 177     }
 178 }
 179
 180 static void updateContinuityFlag(UMat& m)
 181 {
 182     int i, j;
 183     for( i = 0; i < m.dims; i++ )
 184     {
 185         if( m.size[i] > 1 )
 186             break;
 187     }
 188
 189     for( j = m.dims-1; j > i; j-- )
 190     {
 191         if( m.step[j]*m.size[j] < m.step[j-1] )
 192             break;
 193     }
 194
 195     uint64 total = (uint64)m.step[0]*m.size[0];
 196     if( j <= i && total == (size_t)total )
 197         m.flags |= UMat::CONTINUOUS_FLAG;
 198     else
 199         m.flags &= ~UMat::CONTINUOUS_FLAG;
 200 }
 201
 202
 203 static void finalizeHdr(UMat& m)
 204 {
 205     updateContinuityFlag(m);
 206     int d = m.dims;
 207     if( d > 2 )
 208         m.rows = m.cols = -1;
 209 }
 210
 211 UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
 212 {
 213     UMat hdr;
 214     if(!data)
 215         return hdr;
 216     UMatData* temp_u = u;
 217     if(!temp_u)
 218     {
 219         MatAllocator *a = allocator, *a0 = getStdAllocator();
 220         if(!a)
 221             a = a0;
 222         temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
 223         temp_u->refcount = 1;
 224     }
 225     UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
 226     hdr.flags = flags;
 227     setSize(hdr, dims, size.p, step.p);
 228     finalizeHdr(hdr);
 229     hdr.u = temp_u;
 230     hdr.offset = data - datastart;
 231     hdr.addref();
 232     return hdr;
 233 }
 234
 235 void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags)
 236 {
 237     this->usageFlags = _usageFlags;
 238
 239     int i;
 240     CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
 241     _type = CV_MAT_TYPE(_type);
 242
 243     if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
 244     {
 245         if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
 246             return;
 247         for( i = 0; i < d; i++ )
 248             if( size[i] != _sizes[i] )
 249                 break;
 250         if( i == d && (d > 1 || size[1] == 1))
 251             return;
 252     }
 253
 254     release();
 255     if( d == 0 )
 256         return;
 257     flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
 258     setSize(*this, d, _sizes, 0, true);
 259     offset = 0;
 260
 261     if( total() > 0 )
 262     {
 263         MatAllocator *a = allocator, *a0 = getStdAllocator();
 264         if(!a)
 265             a = a0;
 266         try
 267         {
 268             u = a->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
 269             CV_Assert(u != 0);
 270         }
 271         catch(...)
 272         {
 273             if(a != a0)
 274                 u = a0->allocate(dims, size, _type, 0, step.p, 0, usageFlags);
 275             CV_Assert(u != 0);
 276         }
 277         CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
 278     }
 279
 280     finalizeHdr(*this);
 281     addref();
 282 }
 283
 284 void UMat::copySize(const UMat& m)
 285 {
 286     setSize(*this, m.dims, 0, 0);
 287     for( int i = 0; i < dims; i++ )
 288     {
 289         size[i] = m.size[i];
 290         step[i] = m.step[i];
 291     }
 292 }
 293
 294
 295 UMat::~UMat()
 296 {
 297     release();
 298     if( step.p != step.buf )
 299         fastFree(step.p);
 300 }
 301
 302 void UMat::deallocate()
 303 {
 304     u->currAllocator->deallocate(u);
 305     u = NULL;
 306 }
 307
 308
 309 UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
 310     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
 311 {
 312     CV_Assert( m.dims >= 2 );
 313     if( m.dims > 2 )
 314     {
 315         AutoBuffer<Range> rs(m.dims);
 316         rs[0] = _rowRange;
 317         rs[1] = _colRange;
 318         for( int i = 2; i < m.dims; i++ )
 319             rs[i] = Range::all();
 320         *this = m(rs);
 321         return;
 322     }
 323
 324     *this = m;
 325     if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
 326     {
 327         CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
 328         rows = _rowRange.size();
 329         offset += step*_rowRange.start;
 330         flags |= SUBMATRIX_FLAG;
 331     }
 332
 333     if( _colRange != Range::all() && _colRange != Range(0,cols) )
 334     {
 335         CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
 336         cols = _colRange.size();
 337         offset += _colRange.start*elemSize();
 338         flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
 339         flags |= SUBMATRIX_FLAG;
 340     }
 341
 342     if( rows == 1 )
 343         flags |= CONTINUOUS_FLAG;
 344
 345     if( rows <= 0 || cols <= 0 )
 346     {
 347         release();
 348         rows = cols = 0;
 349     }
 350 }
 351
 352
 353 UMat::UMat(const UMat& m, const Rect& roi)
 354     : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
 355     allocator(m.allocator), usageFlags(m.usageFlags), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
 356 {
 357     CV_Assert( m.dims <= 2 );
 358     flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
 359     flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
 360
 361     size_t esz = CV_ELEM_SIZE(flags);
 362     offset += roi.x*esz;
 363     CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
 364               0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
 365     if( u )
 366         CV_XADD(&(u->urefcount), 1);
 367     if( roi.width < m.cols || roi.height < m.rows )
 368         flags |= SUBMATRIX_FLAG;
 369
 370     step[0] = m.step[0]; step[1] = esz;
 371
 372     if( rows <= 0 || cols <= 0 )
 373     {
 374         release();
 375         rows = cols = 0;
 376     }
 377 }
 378
 379
 380 UMat::UMat(const UMat& m, const Range* ranges)
 381     : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), usageFlags(USAGE_DEFAULT), u(0), offset(0), size(&rows)
 382 {
 383     int i, d = m.dims;
 384
 385     CV_Assert(ranges);
 386     for( i = 0; i < d; i++ )
 387     {
 388         Range r = ranges[i];
 389         CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
 390     }
 391     *this = m;
 392     for( i = 0; i < d; i++ )
 393     {
 394         Range r = ranges[i];
 395         if( r != Range::all() && r != Range(0, size.p[i]))
 396         {
 397             size.p[i] = r.end - r.start;
 398             offset += r.start*step.p[i];
 399             flags |= SUBMATRIX_FLAG;
 400         }
 401     }
 402     updateContinuityFlag(*this);
 403 }
 404
 405 UMat UMat::diag(int d) const
 406 {
 407     CV_Assert( dims <= 2 );
 408     UMat m = *this;
 409     size_t esz = elemSize();
 410     int len;
 411
 412     if( d >= 0 )
 413     {
 414         len = std::min(cols - d, rows);
 415         m.offset += esz*d;
 416     }
 417     else
 418     {
 419         len = std::min(rows + d, cols);
 420         m.offset -= step[0]*d;
 421     }
 422     CV_DbgAssert( len > 0 );
 423
 424     m.size[0] = m.rows = len;
 425     m.size[1] = m.cols = 1;
 426     m.step[0] += (len > 1 ? esz : 0);
 427
 428     if( m.rows > 1 )
 429         m.flags &= ~CONTINUOUS_FLAG;
 430     else
 431         m.flags |= CONTINUOUS_FLAG;
 432
 433     if( size() != Size(1,1) )
 434         m.flags |= SUBMATRIX_FLAG;
 435
 436     return m;
 437 }
 438
 439 void UMat::locateROI( Size& wholeSize, Point& ofs ) const
 440 {
 441     CV_Assert( dims <= 2 && step[0] > 0 );
 442     size_t esz = elemSize(), minstep;
 443     ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
 444
 445     if( delta1 == 0 )
 446         ofs.x = ofs.y = 0;
 447     else
 448     {
 449         ofs.y = (int)(delta1/step[0]);
 450         ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
 451         CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
 452     }
 453     minstep = (ofs.x + cols)*esz;
 454     wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
 455     wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
 456     wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
 457     wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
 458 }
 459
 460
 461 UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
 462 {
 463     CV_Assert( dims <= 2 && step[0] > 0 );
 464     Size wholeSize; Point ofs;
 465     size_t esz = elemSize();
 466     locateROI( wholeSize, ofs );
 467     int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
 468     int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
 469     offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
 470     rows = row2 - row1; cols = col2 - col1;
 471     size.p[0] = rows; size.p[1] = cols;
 472     if( esz*cols == step[0] || rows == 1 )
 473         flags |= CONTINUOUS_FLAG;
 474     else
 475         flags &= ~CONTINUOUS_FLAG;
 476     return *this;
 477 }
 478
 479
 480 UMat UMat::reshape(int new_cn, int new_rows) const
 481 {
 482     int cn = channels();
 483     UMat hdr = *this;
 484
 485     if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
 486     {
 487         hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
 488         hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
 489         hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
 490         return hdr;
 491     }
 492
 493     CV_Assert( dims <= 2 );
 494
 495     if( new_cn == 0 )
 496         new_cn = cn;
 497
 498     int total_width = cols * cn;
 499
 500     if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
 501         new_rows = rows * total_width / new_cn;
 502
 503     if( new_rows != 0 && new_rows != rows )
 504     {
 505         int total_size = total_width * rows;
 506         if( !isContinuous() )
 507             CV_Error( CV_BadStep,
 508             "The matrix is not continuous, thus its number of rows can not be changed" );
 509
 510         if( (unsigned)new_rows > (unsigned)total_size )
 511             CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
 512
 513         total_width = total_size / new_rows;
 514
 515         if( total_width * new_rows != total_size )
 516             CV_Error( CV_StsBadArg, "The total number of matrix elements "
 517                                     "is not divisible by the new number of rows" );
 518
 519         hdr.rows = new_rows;
 520         hdr.step[0] = total_width * elemSize1();
 521     }
 522
 523     int new_width = total_width / new_cn;
 524
 525     if( new_width * new_cn != total_width )
 526         CV_Error( CV_BadNumChannels,
 527         "The total width is not divisible by the new number of channels" );
 528
 529     hdr.cols = new_width;
 530     hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
 531     hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
 532     return hdr;
 533 }
 534
 535 UMat UMat::diag(const UMat& d)
 536 {
 537     CV_Assert( d.cols == 1 || d.rows == 1 );
 538     int len = d.rows + d.cols - 1;
 539     UMat m(len, len, d.type(), Scalar(0));
 540     UMat md = m.diag();
 541     if( d.cols == 1 )
 542         d.copyTo(md);
 543     else
 544         transpose(d, md);
 545     return m;
 546 }
 547
 548 int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
 549 {
 550     return (depth() == _depth || _depth <= 0) &&
 551         (isContinuous() || !_requireContinuous) &&
 552         ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
 553                         (cols == _elemChannels && channels() == 1))) ||
 554         (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
 555          (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
 556     ? (int)(total()*channels()/_elemChannels) : -1;
 557 }
 558
 559 UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
 560 {
 561     if(_newndims == dims)
 562     {
 563         if(_newsz == 0)
 564             return reshape(_cn);
 565         if(_newndims == 2)
 566             return reshape(_cn, _newsz[0]);
 567     }
 568
 569     CV_Error(CV_StsNotImplemented, "");
 570     // TBD
 571     return UMat();
 572 }
 573
 574
 575 Mat UMat::getMat(int accessFlags) const
 576 {
 577     if(!u)
 578         return Mat();
 579     u->currAllocator->map(u, accessFlags | ACCESS_READ);
 580     CV_Assert(u->data != 0);
 581     Mat hdr(dims, size.p, type(), u->data + offset, step.p);
 582     hdr.flags = flags;
 583     hdr.u = u;
 584     hdr.datastart = u->data;
 585     hdr.data = u->data + offset;
 586     hdr.datalimit = hdr.dataend = u->data + u->size;
 587     CV_XADD(&hdr.u->refcount, 1);
 588     return hdr;
 589 }
 590
 591 void* UMat::handle(int accessFlags) const
 592 {
 593     if( !u )
 594         return 0;
 595
 596     if ((accessFlags & ACCESS_WRITE) != 0)
 597         u->markHostCopyObsolete(true);
 598
 599     // check flags: if CPU copy is newer, copy it back to GPU.
 600     if( u->deviceCopyObsolete() )
 601     {
 602         CV_Assert(u->refcount == 0);
 603         u->currAllocator->unmap(u);
 604     }
 605     return u->handle;
 606 }
 607
 608 void UMat::ndoffset(size_t* ofs) const
 609 {
 610     // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
 611     size_t val = offset;
 612     for( int i = 0; i < dims; i++ )
 613     {
 614         size_t s = step.p[i];
 615         ofs[i] = val / s;
 616         val -= ofs[i]*s;
 617     }
 618 }
 619
 620 void UMat::copyTo(OutputArray _dst) const
 621 {
 622     int dtype = _dst.type();
 623     if( _dst.fixedType() && dtype != type() )
 624     {
 625         CV_Assert( channels() == CV_MAT_CN(dtype) );
 626         convertTo( _dst, dtype );
 627         return;
 628     }
 629
 630     if( empty() )
 631     {
 632         _dst.release();
 633         return;
 634     }
 635
 636     size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
 637     for( i = 0; i < (size_t)dims; i++ )
 638         sz[i] = size.p[i];
 639     sz[dims-1] *= esz;
 640     ndoffset(srcofs);
 641     srcofs[dims-1] *= esz;
 642
 643     _dst.create( dims, size.p, type() );
 644     if( _dst.isUMat() )
 645     {
 646         UMat dst = _dst.getUMat();
 647         if( u == dst.u && dst.offset == offset )
 648             return;
 649
 650         if (u->currAllocator == dst.u->currAllocator)
 651         {
 652             dst.ndoffset(dstofs);
 653             dstofs[dims-1] *= esz;
 654             u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
 655             return;
 656         }
 657     }
 658
 659     Mat dst = _dst.getMat();
 660     u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p);
 661 }
 662
 663 void UMat::copyTo(OutputArray _dst, InputArray _mask) const
 664 {
 665     if( _mask.empty() )
 666     {
 667         copyTo(_dst);
 668         return;
 669     }
 670 #ifdef HAVE_OPENCL
 671     int cn = channels(), mtype = _mask.type(), mdepth = CV_MAT_DEPTH(mtype), mcn = CV_MAT_CN(mtype);
 672     CV_Assert( mdepth == CV_8U && (mcn == 1 || mcn == cn) );
 673
 674     if (ocl::useOpenCL() && _dst.isUMat() && dims <= 2)
 675     {
 676         UMatData * prevu = _dst.getUMat().u;
 677         _dst.create( dims, size, type() );
 678
 679         UMat dst = _dst.getUMat();
 680
 681         bool haveDstUninit = false;
 682         if( prevu != dst.u ) // do not leave dst uninitialized
 683             haveDstUninit = true;
 684
 685         String opts = format("-D COPY_TO_MASK -D T1=%s -D scn=%d -D mcn=%d%s",
 686                              ocl::memopTypeToStr(depth()), cn, mcn,
 687                              haveDstUninit ? " -D HAVE_DST_UNINIT" : "");
 688
 689         ocl::Kernel k("copyToMask", ocl::core::copyset_oclsrc, opts);
 690         if (!k.empty())
 691         {
 692             k.args(ocl::KernelArg::ReadOnlyNoSize(*this),
 693                    ocl::KernelArg::ReadOnlyNoSize(_mask.getUMat()),
 694                    haveDstUninit ? ocl::KernelArg::WriteOnly(dst) :
 695                                    ocl::KernelArg::ReadWrite(dst));
 696
 697             size_t globalsize[2] = { cols, rows };
 698             if (k.run(2, globalsize, NULL, false))
 699                 return;
 700         }
 701     }
 702 #endif
 703     Mat src = getMat(ACCESS_READ);
 704     src.copyTo(_dst, _mask);
 705 }
 706
 707 void UMat::convertTo(OutputArray _dst, int _type, double alpha, double beta) const
 708 {
 709     bool noScale = std::fabs(alpha - 1) < DBL_EPSILON && std::fabs(beta) < DBL_EPSILON;
 710     int stype = type(), cn = CV_MAT_CN(stype);
 711
 712     if( _type < 0 )
 713         _type = _dst.fixedType() ? _dst.type() : stype;
 714     else
 715         _type = CV_MAKETYPE(CV_MAT_DEPTH(_type), cn);
 716
 717     int sdepth = CV_MAT_DEPTH(stype), ddepth = CV_MAT_DEPTH(_type);
 718     if( sdepth == ddepth && noScale )
 719     {
 720         copyTo(_dst);
 721         return;
 722     }
 723 #ifdef HAVE_OPENCL
 724     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 725     bool needDouble = sdepth == CV_64F || ddepth == CV_64F;
 726     if( dims <= 2 && cn && _dst.isUMat() && ocl::useOpenCL() &&
 727             ((needDouble && doubleSupport) || !needDouble) )
 728     {
 729         int wdepth = std::max(CV_32F, sdepth), rowsPerWI = 4;
 730
 731         char cvt[2][40];
 732         ocl::Kernel k("convertTo", ocl::core::convert_oclsrc,
 733                       format("-D srcT=%s -D WT=%s -D dstT=%s -D convertToWT=%s -D convertToDT=%s%s",
 734                              ocl::typeToStr(sdepth), ocl::typeToStr(wdepth), ocl::typeToStr(ddepth),
 735                              ocl::convertTypeStr(sdepth, wdepth, 1, cvt[0]),
 736                              ocl::convertTypeStr(wdepth, ddepth, 1, cvt[1]),
 737                              doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
 738         if (!k.empty())
 739         {
 740             UMat src = *this;
 741             _dst.create( size(), _type );
 742             UMat dst = _dst.getUMat();
 743
 744             float alphaf = (float)alpha, betaf = (float)beta;
 745             ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
 746                     dstarg = ocl::KernelArg::WriteOnly(dst, cn);
 747
 748             if (wdepth == CV_32F)
 749                 k.args(srcarg, dstarg, alphaf, betaf, rowsPerWI);
 750             else
 751                 k.args(srcarg, dstarg, alpha, beta, rowsPerWI);
 752
 753             size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
 754             if (k.run(2, globalsize, NULL, false))
 755                 return;
 756         }
 757     }
 758 #endif
 759     Mat m = getMat(ACCESS_READ);
 760     m.convertTo(_dst, _type, alpha, beta);
 761 }
 762
 763 UMat& UMat::setTo(InputArray _value, InputArray _mask)
 764 {
 765     bool haveMask = !_mask.empty();
 766 #ifdef HAVE_OPENCL
 767     int tp = type(), cn = CV_MAT_CN(tp);
 768
 769     if( dims <= 2 && cn <= 4 && CV_MAT_DEPTH(tp) < CV_64F && ocl::useOpenCL() )
 770     {
 771         Mat value = _value.getMat();
 772         CV_Assert( checkScalar(value, type(), _value.kind(), _InputArray::UMAT) );
 773         double buf[4] = { 0, 0, 0, 0 };
 774         convertAndUnrollScalar(value, tp, (uchar *)buf, 1);
 775
 776         int scalarcn = cn == 3 ? 4 : cn, rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
 777         String opts = format("-D dstT=%s -D rowsPerWI=%d -D dstST=%s -D dstT1=%s -D cn=%d",
 778                              ocl::memopTypeToStr(tp), rowsPerWI,
 779                              ocl::memopTypeToStr(CV_MAKETYPE(tp, scalarcn)),
 780                              ocl::memopTypeToStr(CV_MAT_DEPTH(tp)), cn);
 781
 782         ocl::Kernel setK(haveMask ? "setMask" : "set", ocl::core::copyset_oclsrc, opts);
 783         if( !setK.empty() )
 784         {
 785             ocl::KernelArg scalararg(0, 0, 0, 0, buf, CV_ELEM_SIZE1(tp) * scalarcn);
 786             UMat mask;
 787
 788             if( haveMask )
 789             {
 790                 mask = _mask.getUMat();
 791                 CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
 792                 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
 793                         dstarg = ocl::KernelArg::ReadWrite(*this);
 794                 setK.args(maskarg, dstarg, scalararg);
 795             }
 796             else
 797             {
 798                 ocl::KernelArg dstarg = ocl::KernelArg::WriteOnly(*this);
 799                 setK.args(dstarg, scalararg);
 800             }
 801
 802             size_t globalsize[] = { cols, (rows + rowsPerWI - 1) / rowsPerWI };
 803             if( setK.run(2, globalsize, NULL, false) )
 804                 return *this;
 805         }
 806     }
 807 #endif
 808     Mat m = getMat(haveMask ? ACCESS_RW : ACCESS_WRITE);
 809     m.setTo(_value, _mask);
 810     return *this;
 811 }
 812
 813 UMat& UMat::operator = (const Scalar& s)
 814 {
 815     setTo(s);
 816     return *this;
 817 }
 818
 819 UMat UMat::t() const
 820 {
 821     UMat m;
 822     transpose(*this, m);
 823     return m;
 824 }
 825
 826 UMat UMat::inv(int method) const
 827 {
 828     UMat m;
 829     invert(*this, m, method);
 830     return m;
 831 }
 832
 833 UMat UMat::mul(InputArray m, double scale) const
 834 {
 835     UMat dst;
 836     multiply(*this, m, dst, scale);
 837     return dst;
 838 }
 839
 840 #ifdef HAVE_OPENCL
 841
 842 static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
 843 {
 844     UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
 845
 846     int type = src1.type(), depth = CV_MAT_DEPTH(type),
 847             kercn = ocl::predictOptimalVectorWidth(src1, src2);
 848     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 849
 850     if ( !doubleSupport && depth == CV_64F )
 851         return false;
 852
 853     int dbsize = ocl::Device::getDefault().maxComputeUnits();
 854     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
 855     int ddepth = std::max(CV_32F, depth);
 856
 857     int wgs2_aligned = 1;
 858     while (wgs2_aligned < (int)wgs)
 859         wgs2_aligned <<= 1;
 860     wgs2_aligned >>= 1;
 861
 862     char cvt[40];
 863     ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
 864                   format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
 865                          "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
 866                          ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
 867                          ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
 868                          ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
 869                          (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
 870                          _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
 871                          _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
 872     if (k.empty())
 873         return false;
 874
 875     UMat db(1, dbsize, ddepth);
 876
 877     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
 878             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
 879             dbarg = ocl::KernelArg::PtrWriteOnly(db);
 880
 881     k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
 882
 883     size_t globalsize = dbsize * wgs;
 884     if (k.run(1, &globalsize, &wgs, false))
 885     {
 886         res = sum(db.getMat(ACCESS_READ))[0];
 887         return true;
 888     }
 889     return false;
 890 }
 891
 892 #endif
 893
 894 double UMat::dot(InputArray m) const
 895 {
 896     CV_Assert(m.sameSize(*this) && m.type() == type());
 897
 898 #ifdef HAVE_OPENCL
 899     double r = 0;
 900     CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
 901 #endif
 902
 903     return getMat(ACCESS_READ).dot(m);
 904 }
 905
 906 UMat UMat::zeros(int rows, int cols, int type)
 907 {
 908     return UMat(rows, cols, type, Scalar::all(0));
 909 }
 910
 911 UMat UMat::zeros(Size size, int type)
 912 {
 913     return UMat(size, type, Scalar::all(0));
 914 }
 915
 916 UMat UMat::zeros(int ndims, const int* sz, int type)
 917 {
 918     return UMat(ndims, sz, type, Scalar::all(0));
 919 }
 920
 921 UMat UMat::ones(int rows, int cols, int type)
 922 {
 923     return UMat::ones(Size(cols, rows), type);
 924 }
 925
 926 UMat UMat::ones(Size size, int type)
 927 {
 928     return UMat(size, type, Scalar(1));
 929 }
 930
 931 UMat UMat::ones(int ndims, const int* sz, int type)
 932 {
 933     return UMat(ndims, sz, type, Scalar(1));
 934 }
 935
 936 UMat UMat::eye(int rows, int cols, int type)
 937 {
 938     return UMat::eye(Size(cols, rows), type);
 939 }
 940
 941 UMat UMat::eye(Size size, int type)
 942 {
 943     UMat m(size, type);
 944     setIdentity(m);
 945     return m;
 946 }
 947
 948 }
 949
 950 /* End of file. */