modules/core/src/gpumat.cpp

   1 /*M///////////////////////////////////////////////////////////////////////////////////////
   2 //
   3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
   4 //
   5 //  By downloading, copying, installing or using the software you agree to this license.
   6 //  If you do not agree to this license, do not download, install,
   7 //  copy or use the software.
   8 //
   9 //
  10 //                           License Agreement
  11 //                For Open Source Computer Vision Library
  12 //
  13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
  14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
  15 // Third party copyrights are property of their respective owners.
  16 //
  17 // Redistribution and use in source and binary forms, with or without modification,
  18 // are permitted provided that the following conditions are met:
  19 //
  20 //   * Redistribution's of source code must retain the above copyright notice,
  21 //     this list of conditions and the following disclaimer.
  22 //
  23 //   * Redistribution's in binary form must reproduce the above copyright notice,
  24 //     this list of conditions and the following disclaimer in the documentation
  25 //     and/or other materials provided with the distribution.
  26 //
  27 //   * The name of the copyright holders may not be used to endorse or promote products
  28 //     derived from this software without specific prior written permission.
  29 //
  30 // This software is provided by the copyright holders and contributors "as is" and
  31 // any express or implied warranties, including, but not limited to, the implied
  32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
  33 // In no event shall the Intel Corporation or contributors be liable for any direct,
  34 // indirect, incidental, special, exemplary, or consequential damages
  35 // (including, but not limited to, procurement of substitute goods or services;
  36 // loss of use, data, or profits; or business interruption) however caused
  37 // and on any theory of liability, whether in contract, strict liability,
  38 // or tort (including negligence or otherwise) arising in any way out of
  39 // the use of this software, even if advised of the possibility of such damage.
  40 //
  41 //M*/
  42
  43 #include "precomp.hpp"
  44 #include "opencv2/core/gpumat.hpp"
  45 #include <iostream>
  46
  47 #if defined(HAVE_CUDA)
  48     #include <cuda_runtime.h>
  49     #include <npp.h>
  50
  51     #define CUDART_MINIMUM_REQUIRED_VERSION 4020
  52     #define NPP_MINIMUM_REQUIRED_VERSION 4200
  53
  54     #if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
  55         #error "Insufficient Cuda Runtime library version, please update it."
  56     #endif
  57
  58     #if (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD < NPP_MINIMUM_REQUIRED_VERSION)
  59         #error "Insufficient NPP version, please update it."
  60     #endif
  61 #endif
  62
  63 #ifdef DYNAMIC_CUDA_SUPPORT
  64 #include <dlfcn.h>
  65 #include <sys/types.h>
  66 #include <sys/stat.h>
  67 #include <dirent.h>
  68 #endif
  69
  70 #ifdef ANDROID
  71 # include <android/log.h>
  72
  73 # define LOG_TAG "OpenCV::CUDA"
  74 # define LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__))
  75 # define LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__))
  76 # define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__))
  77 #endif
  78
  79 using namespace std;
  80 using namespace cv;
  81 using namespace cv::gpu;
  82
  83 #define throw_nogpu CV_Error(CV_GpuNotSupported, "The library is compiled without CUDA support")
  84
  85 #include "opencv2/dynamicuda/dynamicuda.hpp"
  86
  87 #ifdef DYNAMIC_CUDA_SUPPORT
  88
  89 typedef GpuFuncTable* (*GpuFactoryType)();
  90 typedef DeviceInfoFuncTable* (*DeviceInfoFactoryType)();
  91
  92 static GpuFactoryType gpuFactory = NULL;
  93 static DeviceInfoFactoryType deviceInfoFactory = NULL;
  94
  95 # if defined(__linux__) || defined(__APPLE__) || defined (ANDROID)
  96 #  ifdef ANDROID
  97 static const std::string getCudaSupportLibName()
  98 {
  99     Dl_info dl_info;
 100     if(0 != dladdr((void *)getCudaSupportLibName, &dl_info))
 101     {
 102         LOGD("Library name: %s", dl_info.dli_fname);
 103         LOGD("Library base address: %p", dl_info.dli_fbase);
 104
 105         const char* libName=dl_info.dli_fname;
 106         while( ((*libName)=='/') || ((*libName)=='.') )
 107         libName++;
 108
 109         char lineBuf[2048];
 110         FILE* file = fopen("/proc/self/smaps", "rt");
 111
 112         if(file)
 113         {
 114             while (fgets(lineBuf, sizeof lineBuf, file) != NULL)
 115             {
 116                 //verify that line ends with library name
 117                 int lineLength = strlen(lineBuf);
 118                 int libNameLength = strlen(libName);
 119
 120                 //trim end
 121                 for(int i = lineLength - 1; i >= 0 && isspace(lineBuf[i]); --i)
 122                 {
 123                     lineBuf[i] = 0;
 124                     --lineLength;
 125                 }
 126
 127                 if (0 != strncmp(lineBuf + lineLength - libNameLength, libName, libNameLength))
 128                 {
 129                 //the line does not contain the library name
 130                     continue;
 131                 }
 132
 133                 //extract path from smaps line
 134                 char* pathBegin = strchr(lineBuf, '/');
 135                 if (0 == pathBegin)
 136                 {
 137                     LOGE("Strange error: could not find path beginning in lin \"%s\"", lineBuf);
 138                     continue;
 139                 }
 140
 141                 char* pathEnd = strrchr(pathBegin, '/');
 142                 pathEnd[1] = 0;
 143
 144                 LOGD("Libraries folder found: %s", pathBegin);
 145
 146                 fclose(file);
 147                 return std::string(pathBegin) + "/libopencv_core_cuda.so";
 148             }
 149             fclose(file);
 150             LOGE("Could not find library path");
 151         }
 152         else
 153         {
 154             LOGE("Could not read /proc/self/smaps");
 155         }
 156     }
 157     else
 158     {
 159         LOGE("Could not get library name and base address");
 160     }
 161
 162     return string();
 163 }
 164
 165 #  else
 166 static const std::string getCudaSupportLibName()
 167 {
 168     return "libopencv_core_cuda.so";
 169 }
 170 #  endif
 171
 172 static bool loadCudaSupportLib()
 173 {
 174     void* handle;
 175     const std::string name = getCudaSupportLibName();
 176     handle = dlopen(name.c_str(), RTLD_LAZY);
 177     if (!handle)
 178         return false;
 179
 180     deviceInfoFactory = (DeviceInfoFactoryType)dlsym(handle, "deviceInfoFactory");
 181     if (!deviceInfoFactory)
 182     {
 183         dlclose(handle);
 184         return false;
 185     }
 186
 187     gpuFactory = (GpuFactoryType)dlsym(handle, "gpuFactory");
 188     if (!gpuFactory)
 189     {
 190         dlclose(handle);
 191         return false;
 192     }
 193
 194     return true;
 195 }
 196
 197 # else
 198 #  error "Dynamic CUDA support is not implemented for this platform!"
 199 # endif
 200
 201 #endif
 202
 203 static GpuFuncTable* gpuFuncTable()
 204 {
 205 #ifdef DYNAMIC_CUDA_SUPPORT
 206    static EmptyFuncTable stub;
 207    static GpuFuncTable* libFuncTable = loadCudaSupportLib() ? gpuFactory(): (GpuFuncTable*)&stub;
 208    static GpuFuncTable *funcTable = libFuncTable ? libFuncTable : (GpuFuncTable*)&stub;
 209 #else
 210 # ifdef USE_CUDA
 211    static CudaFuncTable impl;
 212    static GpuFuncTable* funcTable = &impl;
 213 #else
 214    static EmptyFuncTable stub;
 215    static GpuFuncTable* funcTable = &stub;
 216 #endif
 217 #endif
 218    return funcTable;
 219 }
 220
 221 static DeviceInfoFuncTable* deviceInfoFuncTable()
 222 {
 223 #ifdef DYNAMIC_CUDA_SUPPORT
 224    static EmptyDeviceInfoFuncTable stub;
 225    static DeviceInfoFuncTable* libFuncTable = loadCudaSupportLib() ? deviceInfoFactory(): (DeviceInfoFuncTable*)&stub;
 226    static DeviceInfoFuncTable* funcTable = libFuncTable ? libFuncTable : (DeviceInfoFuncTable*)&stub;
 227 #else
 228 # ifdef USE_CUDA
 229    static CudaDeviceInfoFuncTable impl;
 230    static DeviceInfoFuncTable* funcTable = &impl;
 231 #else
 232    static EmptyDeviceInfoFuncTable stub;
 233    static DeviceInfoFuncTable* funcTable = &stub;
 234 #endif
 235 #endif
 236    return funcTable;
 237 }
 238
 239
 240 //////////////////////////////// Initialization & Info ////////////////////////
 241
 242 int cv::gpu::getCudaEnabledDeviceCount() { return deviceInfoFuncTable()->getCudaEnabledDeviceCount(); }
 243
 244 void cv::gpu::setDevice(int device) { deviceInfoFuncTable()->setDevice(device); }
 245 int cv::gpu::getDevice() { return deviceInfoFuncTable()->getDevice(); }
 246
 247 void cv::gpu::resetDevice() { deviceInfoFuncTable()->resetDevice(); }
 248
 249 bool cv::gpu::deviceSupports(FeatureSet feature_set) { return deviceInfoFuncTable()->deviceSupports(feature_set); }
 250
 251 bool cv::gpu::TargetArchs::builtWith(FeatureSet feature_set) { return deviceInfoFuncTable()->builtWith(feature_set); }
 252 bool cv::gpu::TargetArchs::has(int major, int minor) { return deviceInfoFuncTable()->has(major, minor); }
 253 bool cv::gpu::TargetArchs::hasPtx(int major, int minor) {  return deviceInfoFuncTable()->hasPtx(major, minor); }
 254 bool cv::gpu::TargetArchs::hasBin(int major, int minor) { return deviceInfoFuncTable()->hasBin(major, minor);  }
 255 bool cv::gpu::TargetArchs::hasEqualOrLessPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrLessPtx(major, minor); }
 256 bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreater(major, minor); }
 257 bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterPtx(major, minor); }
 258 bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return deviceInfoFuncTable()->hasEqualOrGreaterBin(major, minor); }
 259
 260 size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
 261 void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
 262 size_t cv::gpu::DeviceInfo::freeMemory() const { return deviceInfoFuncTable()->freeMemory(); }
 263 size_t cv::gpu::DeviceInfo::totalMemory() const { return deviceInfoFuncTable()->totalMemory(); }
 264 bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return deviceInfoFuncTable()->supports(feature_set); }
 265 bool cv::gpu::DeviceInfo::isCompatible() const { return deviceInfoFuncTable()->isCompatible(); }
 266 int cv::gpu::DeviceInfo::deviceID() const { return deviceInfoFuncTable()->deviceID(); };
 267 int cv::gpu::DeviceInfo::majorVersion() const { return deviceInfoFuncTable()->majorVersion(); }
 268 int cv::gpu::DeviceInfo::minorVersion() const { return deviceInfoFuncTable()->minorVersion(); }
 269 std::string cv::gpu::DeviceInfo::name() const { return deviceInfoFuncTable()->name(); }
 270 int cv::gpu::DeviceInfo::multiProcessorCount() const { return deviceInfoFuncTable()->multiProcessorCount(); }
 271 void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }
 272
 273 void cv::gpu::printCudaDeviceInfo(int device) { deviceInfoFuncTable()->printCudaDeviceInfo(device); }
 274 void cv::gpu::printShortCudaDeviceInfo(int device) { deviceInfoFuncTable()->printShortCudaDeviceInfo(device); }
 275
 276 namespace cv { namespace gpu
 277 {
 278     CV_EXPORTS void copyWithMask(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, const cv::gpu::GpuMat&, cudaStream_t);
 279     CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&);
 280     CV_EXPORTS void convertTo(const cv::gpu::GpuMat&, cv::gpu::GpuMat&, double, double, cudaStream_t = 0);
 281     CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, cudaStream_t);
 282     CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&, cudaStream_t);
 283     CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar);
 284     CV_EXPORTS void setTo(cv::gpu::GpuMat&, cv::Scalar, const cv::gpu::GpuMat&);
 285 }}
 286
 287 //////////////////////////////// GpuMat ///////////////////////////////
 288
 289 cv::gpu::GpuMat::GpuMat(const GpuMat& m)
 290     : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
 291 {
 292     if (refcount)
 293         CV_XADD(refcount, 1);
 294 }
 295
 296 cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
 297     flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(rows_), cols(cols_),
 298     step(step_), data((uchar*)data_), refcount(0),
 299     datastart((uchar*)data_), dataend((uchar*)data_)
 300 {
 301     size_t minstep = cols * elemSize();
 302
 303     if (step == Mat::AUTO_STEP)
 304     {
 305         step = minstep;
 306         flags |= Mat::CONTINUOUS_FLAG;
 307     }
 308     else
 309     {
 310         if (rows == 1)
 311             step = minstep;
 312
 313         CV_DbgAssert(step >= minstep);
 314
 315         flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
 316     }
 317     dataend += step * (rows - 1) + minstep;
 318 }
 319
 320 cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
 321     flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(size_.height), cols(size_.width),
 322     step(step_), data((uchar*)data_), refcount(0),
 323     datastart((uchar*)data_), dataend((uchar*)data_)
 324 {
 325     size_t minstep = cols * elemSize();
 326
 327     if (step == Mat::AUTO_STEP)
 328     {
 329         step = minstep;
 330         flags |= Mat::CONTINUOUS_FLAG;
 331     }
 332     else
 333     {
 334         if (rows == 1)
 335             step = minstep;
 336
 337         CV_DbgAssert(step >= minstep);
 338
 339         flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
 340     }
 341     dataend += step * (rows - 1) + minstep;
 342 }
 343
 344 cv::gpu::GpuMat::GpuMat(const GpuMat& m, Range _rowRange, Range _colRange)
 345 {
 346     flags = m.flags;
 347     step = m.step; refcount = m.refcount;
 348     data = m.data; datastart = m.datastart; dataend = m.dataend;
 349
 350     if (_rowRange == Range::all())
 351         rows = m.rows;
 352     else
 353     {
 354         CV_Assert(0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows);
 355
 356         rows = _rowRange.size();
 357         data += step*_rowRange.start;
 358     }
 359
 360     if (_colRange == Range::all())
 361         cols = m.cols;
 362     else
 363     {
 364         CV_Assert(0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols);
 365
 366         cols = _colRange.size();
 367         data += _colRange.start*elemSize();
 368         flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
 369     }
 370
 371     if (rows == 1)
 372         flags |= Mat::CONTINUOUS_FLAG;
 373
 374     if (refcount)
 375         CV_XADD(refcount, 1);
 376
 377     if (rows <= 0 || cols <= 0)
 378         rows = cols = 0;
 379 }
 380
 381 cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
 382     flags(m.flags), rows(roi.height), cols(roi.width),
 383     step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
 384     datastart(m.datastart), dataend(m.dataend)
 385 {
 386     flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
 387     data += roi.x * elemSize();
 388
 389     CV_Assert(0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows);
 390
 391     if (refcount)
 392         CV_XADD(refcount, 1);
 393
 394     if (rows <= 0 || cols <= 0)
 395         rows = cols = 0;
 396 }
 397
 398 cv::gpu::GpuMat::GpuMat(const Mat& m) :
 399     flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
 400 {
 401     upload(m);
 402 }
 403
 404 GpuMat& cv::gpu::GpuMat::operator = (const GpuMat& m)
 405 {
 406     if (this != &m)
 407     {
 408         GpuMat temp(m);
 409         swap(temp);
 410     }
 411
 412     return *this;
 413 }
 414
 415 void cv::gpu::GpuMat::swap(GpuMat& b)
 416 {
 417     std::swap(flags, b.flags);
 418     std::swap(rows, b.rows);
 419     std::swap(cols, b.cols);
 420     std::swap(step, b.step);
 421     std::swap(data, b.data);
 422     std::swap(datastart, b.datastart);
 423     std::swap(dataend, b.dataend);
 424     std::swap(refcount, b.refcount);
 425 }
 426
 427 void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
 428 {
 429     size_t esz = elemSize();
 430     ptrdiff_t delta1 = data - datastart;
 431     ptrdiff_t delta2 = dataend - datastart;
 432
 433     CV_DbgAssert(step > 0);
 434
 435     if (delta1 == 0)
 436         ofs.x = ofs.y = 0;
 437     else
 438     {
 439         ofs.y = static_cast<int>(delta1 / step);
 440         ofs.x = static_cast<int>((delta1 - step * ofs.y) / esz);
 441
 442         CV_DbgAssert(data == datastart + ofs.y * step + ofs.x * esz);
 443     }
 444
 445     size_t minstep = (ofs.x + cols) * esz;
 446
 447     wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / step + 1), ofs.y + rows);
 448     wholeSize.width = std::max(static_cast<int>((delta2 - step * (wholeSize.height - 1)) / esz), ofs.x + cols);
 449 }
 450
 451 GpuMat& cv::gpu::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright)
 452 {
 453     Size wholeSize;
 454     Point ofs;
 455     locateROI(wholeSize, ofs);
 456
 457     size_t esz = elemSize();
 458
 459     int row1 = std::max(ofs.y - dtop, 0);
 460     int row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
 461
 462     int col1 = std::max(ofs.x - dleft, 0);
 463     int col2 = std::min(ofs.x + cols + dright, wholeSize.width);
 464
 465     data += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
 466     rows = row2 - row1;
 467     cols = col2 - col1;
 468
 469     if (esz * cols == step || rows == 1)
 470         flags |= Mat::CONTINUOUS_FLAG;
 471     else
 472         flags &= ~Mat::CONTINUOUS_FLAG;
 473
 474     return *this;
 475 }
 476
 477 GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
 478 {
 479     GpuMat hdr = *this;
 480
 481     int cn = channels();
 482     if (new_cn == 0)
 483         new_cn = cn;
 484
 485     int total_width = cols * cn;
 486
 487     if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
 488         new_rows = rows * total_width / new_cn;
 489
 490     if (new_rows != 0 && new_rows != rows)
 491     {
 492         int total_size = total_width * rows;
 493
 494         if (!isContinuous())
 495             CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
 496
 497         if ((unsigned)new_rows > (unsigned)total_size)
 498             CV_Error(CV_StsOutOfRange, "Bad new number of rows");
 499
 500         total_width = total_size / new_rows;
 501
 502         if (total_width * new_rows != total_size)
 503             CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
 504
 505         hdr.rows = new_rows;
 506         hdr.step = total_width * elemSize1();
 507     }
 508
 509     int new_width = total_width / new_cn;
 510
 511     if (new_width * new_cn != total_width)
 512         CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
 513
 514     hdr.cols = new_width;
 515     hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
 516
 517     return hdr;
 518 }
 519
 520 cv::Mat::Mat(const GpuMat& m) : flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
 521 {
 522     m.download(*this);
 523 }
 524
 525 void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m)
 526 {
 527     int area = rows * cols;
 528     if (m.empty() || m.type() != type || !m.isContinuous() || m.size().area() < area)
 529         m.create(1, area, type);
 530
 531     m.cols = cols;
 532     m.rows = rows;
 533     m.step = m.elemSize() * cols;
 534     m.flags |= Mat::CONTINUOUS_FLAG;
 535 }
 536
 537 void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
 538 {
 539     if (m.empty() || m.type() != type || m.data != m.datastart)
 540         m.create(rows, cols, type);
 541     else
 542     {
 543         const size_t esz = m.elemSize();
 544         const ptrdiff_t delta2 = m.dataend - m.datastart;
 545
 546         const size_t minstep = m.cols * esz;
 547
 548         Size wholeSize;
 549         wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / m.step + 1), m.rows);
 550         wholeSize.width = std::max(static_cast<int>((delta2 - m.step * (wholeSize.height - 1)) / esz), m.cols);
 551
 552         if (wholeSize.height < rows || wholeSize.width < cols)
 553             m.create(rows, cols, type);
 554         else
 555         {
 556             m.cols = cols;
 557             m.rows = rows;
 558         }
 559     }
 560 }
 561
 562 GpuMat cv::gpu::allocMatFromBuf(int rows, int cols, int type, GpuMat &mat)
 563 {
 564     if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)
 565         return mat(Rect(0, 0, cols, rows));
 566     return mat = GpuMat(rows, cols, type);
 567 }
 568
 569 void cv::gpu::GpuMat::upload(const Mat& m)
 570 {
 571     CV_DbgAssert(!m.empty());
 572
 573     create(m.size(), m.type());
 574
 575     gpuFuncTable()->copy(m, *this);
 576 }
 577
 578 void cv::gpu::GpuMat::download(Mat& m) const
 579 {
 580     CV_DbgAssert(!empty());
 581
 582     m.create(size(), type());
 583
 584     gpuFuncTable()->copy(*this, m);
 585 }
 586
 587 void cv::gpu::GpuMat::copyTo(GpuMat& m) const
 588 {
 589     CV_DbgAssert(!empty());
 590
 591     m.create(size(), type());
 592
 593     gpuFuncTable()->copy(*this, m);
 594 }
 595
 596 void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
 597 {
 598     if (mask.empty())
 599         copyTo(mat);
 600     else
 601     {
 602         mat.create(size(), type());
 603
 604         gpuFuncTable()->copyWithMask(*this, mat, mask);
 605     }
 606 }
 607
 608 void cv::gpu::GpuMat::convertTo(GpuMat& dst, int rtype, double alpha, double beta) const
 609 {
 610     bool noScale = fabs(alpha - 1) < numeric_limits<double>::epsilon() && fabs(beta) < numeric_limits<double>::epsilon();
 611
 612     if (rtype < 0)
 613         rtype = type();
 614     else
 615         rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
 616
 617     int sdepth = depth();
 618     int ddepth = CV_MAT_DEPTH(rtype);
 619     if (sdepth == ddepth && noScale)
 620     {
 621         copyTo(dst);
 622         return;
 623     }
 624
 625     GpuMat temp;
 626     const GpuMat* psrc = this;
 627     if (sdepth != ddepth && psrc == &dst)
 628     {
 629         temp = *this;
 630         psrc = &temp;
 631     }
 632
 633     dst.create(size(), rtype);
 634
 635     if (noScale)
 636         cv::gpu::convertTo(*psrc, dst);
 637     else
 638         cv::gpu::convertTo(*psrc, dst, alpha, beta);
 639 }
 640
 641 GpuMat& cv::gpu::GpuMat::setTo(Scalar s, const GpuMat& mask)
 642 {
 643     CV_Assert(mask.empty() || mask.type() == CV_8UC1);
 644     CV_DbgAssert(!empty());
 645
 646     gpu::setTo(*this, s, mask);
 647
 648     return *this;
 649 }
 650
 651 void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
 652 {
 653     _type &= TYPE_MASK;
 654
 655     if (rows == _rows && cols == _cols && type() == _type && data)
 656         return;
 657
 658     if (data)
 659         release();
 660
 661     CV_DbgAssert(_rows >= 0 && _cols >= 0);
 662
 663     if (_rows > 0 && _cols > 0)
 664     {
 665         flags = Mat::MAGIC_VAL + _type;
 666         rows = _rows;
 667         cols = _cols;
 668
 669         size_t esz = elemSize();
 670
 671         void* devPtr;
 672         gpuFuncTable()->mallocPitch(&devPtr, &step, esz * cols, rows);
 673
 674         // Single row must be continuous
 675         if (rows == 1)
 676             step = esz * cols;
 677
 678         if (esz * cols == step)
 679             flags |= Mat::CONTINUOUS_FLAG;
 680
 681         int64 _nettosize = static_cast<int64>(step) * rows;
 682         size_t nettosize = static_cast<size_t>(_nettosize);
 683
 684         datastart = data = static_cast<uchar*>(devPtr);
 685         dataend = data + nettosize;
 686
 687         refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
 688         *refcount = 1;
 689     }
 690 }
 691
 692 void cv::gpu::GpuMat::release()
 693 {
 694     if (refcount && CV_XADD(refcount, -1) == 1)
 695     {
 696         fastFree(refcount);
 697
 698         gpuFuncTable()->free(datastart);
 699     }
 700
 701     data = datastart = dataend = 0;
 702     step = rows = cols = 0;
 703     refcount = 0;
 704 }
 705
 706 namespace cv { namespace gpu
 707 {
 708     void convertTo(const GpuMat& src, GpuMat& dst)
 709     {
 710         gpuFuncTable()->convert(src, dst);
 711     }
 712
 713     void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream)
 714     {
 715         gpuFuncTable()->convert(src, dst, alpha, beta, stream);
 716     }
 717
 718     void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
 719     {
 720         gpuFuncTable()->setTo(src, s, cv::gpu::GpuMat(), stream);
 721     }
 722
 723     void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
 724     {
 725         gpuFuncTable()->setTo(src, s, mask, stream);
 726     }
 727
 728     void setTo(GpuMat& src, Scalar s)
 729     {
 730         setTo(src, s, 0);
 731     }
 732
 733     void setTo(GpuMat& src, Scalar s, const GpuMat& mask)
 734     {
 735         setTo(src, s, mask, 0);
 736     }
 737 }}
 738
 739 ////////////////////////////////////////////////////////////////////////
 740 // Error handling
 741
 742 void cv::gpu::error(const char *error_string, const char *file, const int line, const char *func)
 743 {
 744     int code = CV_GpuApiCallError;
 745
 746     if (uncaught_exception())
 747     {
 748         const char* errorStr = cvErrorStr(code);
 749         const char* function = func ? func : "unknown function";
 750
 751         cerr << "OpenCV Error: " << errorStr << "(" << error_string << ") in " << function << ", file " << file << ", line " << line;
 752         cerr.flush();
 753     }
 754     else
 755         cv::error( cv::Exception(code, error_string, func, file, line) );
 756 }