Allocator* allocator;
};
+struct CV_EXPORTS_W GpuData
+{
+ explicit GpuData(size_t _size);
+ ~GpuData();
+
+ GpuData(const GpuData&) = delete;
+ GpuData& operator=(const GpuData&) = delete;
+
+ GpuData(GpuData&&) = delete;
+ GpuData& operator=(GpuData&&) = delete;
+
+ uchar* data;
+ size_t size;
+};
+
+class CV_EXPORTS_W GpuMatND
+{
+public:
+ using SizeArray = std::vector<int>;
+ using StepArray = std::vector<size_t>;
+ using IndexArray = std::vector<int>;
+
+ //! destructor
+ ~GpuMatND();
+
+ //! default constructor
+ GpuMatND();
+
+ /** @overload
+ @param size Array of integers specifying an n-dimensional array shape.
+ @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
+ CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+ */
+ GpuMatND(SizeArray size, int type);
+
+ /** @overload
+ @param size Array of integers specifying an n-dimensional array shape.
+ @param type Array type. Use CV_8UC1, ..., CV_16FC4 to create 1-4 channel matrices, or
+ CV_8UC(n), ..., CV_64FC(n) to create multi-channel (up to CV_CN_MAX channels) matrices.
+ @param data Pointer to the user data. Matrix constructors that take data and step parameters do not
+ allocate matrix data. Instead, they just initialize the matrix header that points to the specified
+ data, which means that no data is copied. This operation is very efficient and can be used to
+ process external data using OpenCV functions. The external data is not automatically deallocated, so
+ you should take care of it.
+ @param step Array of _size.size()-1 steps in case of a multi-dimensional array (the last step is always
+ set to the element size). If not specified, the matrix is assumed to be continuous.
+ */
+ GpuMatND(SizeArray size, int type, void* data, StepArray step = StepArray());
+
+ /** @brief Allocates GPU memory.
+ Suppose there is some GPU memory already allocated. In that case, this method may choose to reuse that
+ GPU memory under the specific condition: it must be of the same size and type, not externally allocated,
+ the GPU memory is continuous(i.e., isContinuous() is true), and is not a sub-matrix of another GpuMatND
+ (i.e., isSubmatrix() is false). In other words, this method guarantees that the GPU memory allocated by
+ this method is always continuous and is not a sub-region of another GpuMatND.
+ */
+ void create(SizeArray size, int type);
+
+ void release();
+
+ void swap(GpuMatND& m) noexcept;
+
+ /** @brief Creates a full copy of the array and the underlying data.
+ The method creates a full copy of the array. It mimics the behavior of Mat::clone(), i.e.
+ the original step is not taken into account. So, the array copy is a continuous array
+ occupying total()\*elemSize() bytes.
+ */
+ GpuMatND clone() const;
+
+ /** @overload
+ This overload is non-blocking, so it may return even if the copy operation is not finished.
+ */
+ GpuMatND clone(Stream& stream) const;
+
+ /** @brief Extracts a sub-matrix.
+ The operator makes a new header for the specified sub-array of \*this.
+ The operator is an O(1) operation, that is, no matrix data is copied.
+ @param ranges Array of selected ranges along each dimension.
+ */
+ GpuMatND operator()(const std::vector<Range>& ranges) const;
+
+ /** @brief Creates a GpuMat header for a 2D plane part of an n-dim matrix.
+ @note The returned GpuMat is constructed with the constructor for user-allocated data.
+ That is, It does not perform reference counting.
+ @note This function does not increment this GpuMatND's reference counter.
+ */
+ GpuMat createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const;
+
+ /** @overload
+ Creates a GpuMat header if this GpuMatND is effectively 2D.
+ @note The returned GpuMat is constructed with the constructor for user-allocated data.
+ That is, It does not perform reference counting.
+ @note This function does not increment this GpuMatND's reference counter.
+ */
+ GpuMat createGpuMatHeader() const;
+
+ /** @brief Extracts a 2D plane part of an n-dim matrix.
+ It differs from createGpuMatHeader(IndexArray, Range, Range) in that it clones a part of this
+ GpuMatND to the returned GpuMat.
+ @note This operator does not increment this GpuMatND's reference counter;
+ */
+ GpuMat operator()(IndexArray idx, Range rowRange, Range colRange) const;
+
+ /** @brief Extracts a 2D plane part of an n-dim matrix if this GpuMatND is effectively 2D.
+ It differs from createGpuMatHeader() in that it clones a part of this GpuMatND.
+ @note This operator does not increment this GpuMatND's reference counter;
+ */
+ operator GpuMat() const;
+
+ GpuMatND(const GpuMatND&) = default;
+ GpuMatND& operator=(const GpuMatND&) = default;
+
+ GpuMatND(GpuMatND&&) noexcept = default;
+ GpuMatND& operator=(GpuMatND&&) noexcept = default;
+
+ void upload(InputArray src);
+ void upload(InputArray src, Stream& stream);
+ void download(OutputArray dst) const;
+ void download(OutputArray dst, Stream& stream) const;
+
+ //! returns true iff the GpuMatND data is continuous
+ //! (i.e. when there are no gaps between successive rows)
+ bool isContinuous() const;
+
+ //! returns true if the matrix is a sub-matrix of another matrix
+ bool isSubmatrix() const;
+
+ //! returns element size in bytes
+ size_t elemSize() const;
+
+ //! returns the size of element channel in bytes
+ size_t elemSize1() const;
+
+ //! returns true if data is null
+ bool empty() const;
+
+ //! returns true if not empty and points to external(user-allocated) gpu memory
+ bool external() const;
+
+ //! returns pointer to the first byte of the GPU memory
+ uchar* getDevicePtr() const;
+
+ //! returns the total number of array elements
+ size_t total() const;
+
+ //! returns the size of underlying memory in bytes
+ size_t totalMemSize() const;
+
+ //! returns element type
+ int type() const;
+
+private:
+ //! internal use
+ void setFields(SizeArray size, int type, StepArray step = StepArray());
+
+public:
+ /*! includes several bit-fields:
+ - the magic signature
+ - continuity flag
+ - depth
+ - number of channels
+ */
+ int flags;
+
+ //! matrix dimensionality
+ int dims;
+
+ //! shape of this array
+ SizeArray size;
+
+ /*! step values
+ Their semantics is identical to the semantics of step for Mat.
+ */
+ StepArray step;
+
+private:
+ /*! internal use
+ If this GpuMatND holds external memory, this is empty.
+ */
+ std::shared_ptr<GpuData> data_;
+
+ /*! internal use
+ If this GpuMatND manages memory with reference counting, this value is
+ always equal to data_->data. If this GpuMatND holds external memory,
+ data_ is empty and data points to the external memory.
+ */
+ uchar* data;
+
+ /*! internal use
+ If this GpuMatND is a sub-matrix of a larger matrix, this value is the
+ difference of the first byte between the sub-matrix and the whole matrix.
+ */
+ size_t offset;
+};
+
/** @brief Creates a continuous matrix.
@param rows Row count.
}
//===================================================================================
+// GpuMatND
+//===================================================================================
+
+inline
+GpuMatND::GpuMatND() :
+ flags(0), dims(0), data(nullptr), offset(0)
+{
+}
+
+inline
+GpuMatND::GpuMatND(SizeArray _size, int _type) :
+ flags(0), dims(0), data(nullptr), offset(0)
+{
+ create(std::move(_size), _type);
+}
+
+inline
+void GpuMatND::swap(GpuMatND& m) noexcept
+{
+ std::swap(*this, m);
+}
+
+inline
+bool GpuMatND::isContinuous() const
+{
+ return (flags & Mat::CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool GpuMatND::isSubmatrix() const
+{
+ return (flags & Mat::SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t GpuMatND::elemSize() const
+{
+ return CV_ELEM_SIZE(flags);
+}
+
+inline
+size_t GpuMatND::elemSize1() const
+{
+ return CV_ELEM_SIZE1(flags);
+}
+
+inline
+bool GpuMatND::empty() const
+{
+ return data == nullptr;
+}
+
+inline
+bool GpuMatND::external() const
+{
+ return !empty() && data_.use_count() == 0;
+}
+
+inline
+uchar* GpuMatND::getDevicePtr() const
+{
+ return data + offset;
+}
+
+inline
+size_t GpuMatND::total() const
+{
+ size_t p = 1;
+ for(auto s : size)
+ p *= s;
+ return p;
+}
+
+inline
+size_t GpuMatND::totalMemSize() const
+{
+ return size[0] * step[0];
+}
+
+inline
+int GpuMatND::type() const
+{
+ return CV_MAT_TYPE(flags);
+}
+
+//===================================================================================
// HostMem
//===================================================================================
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "opencv2/opencv_modules.hpp"
+
+#ifndef HAVE_OPENCV_CUDEV
+
+#error "opencv_cudev is required"
+
+#else
+
+#include "opencv2/core/cuda.hpp"
+#include "opencv2/cudev.hpp"
+
+using namespace cv;
+using namespace cv::cuda;
+
+GpuData::GpuData(const size_t _size)
+ : data(nullptr), size(_size)
+{
+ CV_CUDEV_SAFE_CALL(cudaMalloc(&data, _size));
+}
+
+GpuData::~GpuData()
+{
+ CV_CUDEV_SAFE_CALL(cudaFree(data));
+}
+
+/////////////////////////////////////////////////////
+/// create
+
+void GpuMatND::create(SizeArray _size, int _type)
+{
+ {
+ auto elements_nonzero = [](SizeArray& v)
+ {
+ return std::all_of(v.begin(), v.end(),
+ [](unsigned u){ return u > 0; });
+ };
+ CV_Assert(!_size.empty());
+ CV_Assert(elements_nonzero(_size));
+ }
+
+ _type &= Mat::TYPE_MASK;
+
+ if (size == _size && type() == _type && !empty() && !external() && isContinuous() && !isSubmatrix())
+ return;
+
+ release();
+
+ setFields(std::move(_size), _type);
+
+ data_ = std::make_shared<GpuData>(totalMemSize());
+ data = data_->data;
+ offset = 0;
+}
+
+/////////////////////////////////////////////////////
+/// release
+
+void GpuMatND::release()
+{
+ data = nullptr;
+ data_.reset();
+
+ flags = dims = offset = 0;
+ size.clear();
+ step.clear();
+}
+
+/////////////////////////////////////////////////////
+/// clone
+
+static bool next(uchar*& d, const uchar*& s, std::vector<int>& idx, const int dims, const GpuMatND& dst, const GpuMatND& src)
+{
+ int inc = dims-3;
+
+ while (true)
+ {
+ if (idx[inc] == src.size[inc] - 1)
+ {
+ if (inc == 0)
+ {
+ return false;
+ }
+
+ idx[inc] = 0;
+ d -= (dst.size[inc] - 1) * dst.step[inc];
+ s -= (src.size[inc] - 1) * src.step[inc];
+ inc--;
+ }
+ else
+ {
+ idx[inc]++;
+ d += dst.step[inc];
+ s += src.step[inc];
+ break;
+ }
+ }
+
+ return true;
+}
+
+GpuMatND GpuMatND::clone() const
+{
+ CV_DbgAssert(!empty());
+
+ GpuMatND ret(size, type());
+
+ if (isContinuous())
+ {
+ CV_CUDEV_SAFE_CALL(cudaMemcpy(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice));
+ }
+ else
+ {
+ // 1D arrays are always continuous
+
+ if (dims == 2)
+ {
+ CV_CUDEV_SAFE_CALL(
+ cudaMemcpy2D(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
+ size[1]*step[1], size[0], cudaMemcpyDeviceToDevice)
+ );
+ }
+ else
+ {
+ std::vector<int> idx(dims-2, 0);
+
+ uchar* d = ret.getDevicePtr();
+ const uchar* s = getDevicePtr();
+
+ // iterate each 2D plane
+ do
+ {
+ CV_CUDEV_SAFE_CALL(
+ cudaMemcpy2DAsync(
+ d, ret.step[dims-2], s, step[dims-2],
+ size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice)
+ );
+ }
+ while (next(d, s, idx, dims, ret, *this));
+
+ CV_CUDEV_SAFE_CALL(cudaStreamSynchronize(0));
+ }
+ }
+
+ return ret;
+}
+
+GpuMatND GpuMatND::clone(Stream& stream) const
+{
+ CV_DbgAssert(!empty());
+
+ GpuMatND ret(size, type());
+
+ cudaStream_t _stream = StreamAccessor::getStream(stream);
+
+ if (isContinuous())
+ {
+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(ret.getDevicePtr(), getDevicePtr(), ret.totalMemSize(), cudaMemcpyDeviceToDevice, _stream));
+ }
+ else
+ {
+ // 1D arrays are always continuous
+
+ if (dims == 2)
+ {
+ CV_CUDEV_SAFE_CALL(
+ cudaMemcpy2DAsync(ret.getDevicePtr(), ret.step[0], getDevicePtr(), step[0],
+ size[1]*step[1], size[0], cudaMemcpyDeviceToDevice, _stream)
+ );
+ }
+ else
+ {
+ std::vector<int> idx(dims-2, 0);
+
+ uchar* d = ret.getDevicePtr();
+ const uchar* s = getDevicePtr();
+
+ // iterate each 2D plane
+ do
+ {
+ CV_CUDEV_SAFE_CALL(
+ cudaMemcpy2DAsync(
+ d, ret.step[dims-2], s, step[dims-2],
+ size[dims-1]*step[dims-1], size[dims-2], cudaMemcpyDeviceToDevice, _stream)
+ );
+ }
+ while (next(d, s, idx, dims, ret, *this));
+ }
+ }
+
+ return ret;
+}
+
+/////////////////////////////////////////////////////
+/// upload
+
+void GpuMatND::upload(InputArray src)
+{
+ Mat mat = src.getMat();
+
+ CV_DbgAssert(!mat.empty());
+
+ if (!mat.isContinuous())
+ mat = mat.clone();
+
+ SizeArray _size(mat.dims);
+ std::copy_n(mat.size.p, mat.dims, _size.data());
+
+ create(std::move(_size), mat.type());
+
+ CV_CUDEV_SAFE_CALL(cudaMemcpy(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice));
+}
+
+void GpuMatND::upload(InputArray src, Stream& stream)
+{
+ Mat mat = src.getMat();
+
+ CV_DbgAssert(!mat.empty());
+
+ if (!mat.isContinuous())
+ mat = mat.clone();
+
+ SizeArray _size(mat.dims);
+ std::copy_n(mat.size.p, mat.dims, _size.data());
+
+ create(std::move(_size), mat.type());
+
+ cudaStream_t _stream = StreamAccessor::getStream(stream);
+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(getDevicePtr(), mat.data, totalMemSize(), cudaMemcpyHostToDevice, _stream));
+}
+
+/////////////////////////////////////////////////////
+/// download
+
+void GpuMatND::download(OutputArray dst) const
+{
+ CV_DbgAssert(!empty());
+
+ dst.create(dims, size.data(), type());
+ Mat mat = dst.getMat();
+
+ GpuMatND gmat = *this;
+
+ if (!gmat.isContinuous())
+ gmat = gmat.clone();
+
+ CV_CUDEV_SAFE_CALL(cudaMemcpy(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost));
+}
+
+void GpuMatND::download(OutputArray dst, Stream& stream) const
+{
+ CV_DbgAssert(!empty());
+
+ dst.create(dims, size.data(), type());
+ Mat mat = dst.getMat();
+
+ GpuMatND gmat = *this;
+
+ if (!gmat.isContinuous())
+ gmat = gmat.clone(stream);
+
+ cudaStream_t _stream = StreamAccessor::getStream(stream);
+ CV_CUDEV_SAFE_CALL(cudaMemcpyAsync(mat.data, gmat.getDevicePtr(), mat.total() * mat.elemSize(), cudaMemcpyDeviceToHost, _stream));
+}
+
+#endif
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "precomp.hpp"
+
+using namespace cv;
+using namespace cv::cuda;
+
+GpuMatND::~GpuMatND() = default;
+
+GpuMatND::GpuMatND(SizeArray _size, int _type, void* _data, StepArray _step) :
+ flags(0), dims(0), data(static_cast<uchar*>(_data)), offset(0)
+{
+ CV_Assert(_step.empty() || _size.size() == _step.size() + 1);
+
+ setFields(std::move(_size), _type, std::move(_step));
+}
+
+GpuMatND GpuMatND::operator()(const std::vector<Range>& ranges) const
+{
+ CV_Assert(dims == (int)ranges.size());
+
+ for (int i = 0; i < dims; ++i)
+ {
+ Range r = ranges[i];
+ CV_Assert(r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= size[i]));
+ }
+
+ GpuMatND ret = *this;
+
+ for (int i = 0; i < dims; ++i)
+ {
+ Range r = ranges[i];
+ if (r != Range::all() && r != Range(0, ret.size[i]))
+ {
+ ret.offset += r.start * ret.step[i];
+ ret.size[i] = r.size();
+ ret.flags |= Mat::SUBMATRIX_FLAG;
+ }
+ }
+
+ ret.flags = cv::updateContinuityFlag(ret.flags, dims, ret.size.data(), ret.step.data());
+
+ return ret;
+}
+
+GpuMat GpuMatND::createGpuMatHeader(IndexArray idx, Range rowRange, Range colRange) const
+{
+ CV_Assert((int)idx.size() == dims - 2);
+
+ std::vector<Range> ranges;
+ for (int i : idx)
+ ranges.emplace_back(i, i+1);
+ ranges.push_back(rowRange);
+ ranges.push_back(colRange);
+
+ return (*this)(ranges).createGpuMatHeader();
+}
+
+GpuMat GpuMatND::createGpuMatHeader() const
+{
+ auto Effectively2D = [](GpuMatND m)
+ {
+ for (int i = 0; i < m.dims - 2; ++i)
+ if (m.size[i] > 1)
+ return false;
+ return true;
+ };
+ CV_Assert(Effectively2D(*this));
+
+ return GpuMat(size[dims-2], size[dims-1], type(), getDevicePtr(), step[dims-2]);
+}
+
+GpuMat GpuMatND::operator()(IndexArray idx, Range rowRange, Range colRange) const
+{
+ return createGpuMatHeader(idx, rowRange, colRange).clone();
+}
+
+GpuMatND::operator GpuMat() const
+{
+ return createGpuMatHeader().clone();
+}
+
+void GpuMatND::setFields(SizeArray _size, int _type, StepArray _step)
+{
+ _type &= Mat::TYPE_MASK;
+
+ flags = Mat::MAGIC_VAL + _type;
+ dims = static_cast<int>(_size.size());
+ size = std::move(_size);
+
+ if (_step.empty())
+ {
+ step = StepArray(dims);
+
+ step.back() = elemSize();
+ for (int _i = dims - 2; _i >= 0; --_i)
+ {
+ const size_t i = _i;
+ step[i] = step[i+1] * size[i+1];
+ }
+
+ flags |= Mat::CONTINUOUS_FLAG;
+ }
+ else
+ {
+ step = std::move(_step);
+ step.push_back(elemSize());
+
+ flags = cv::updateContinuityFlag(flags, dims, size.data(), step.data());
+ }
+
+ CV_Assert(size.size() == step.size());
+ CV_Assert(step.back() == elemSize());
+}
+
+#ifndef HAVE_CUDA
+
+GpuData::GpuData(const size_t _size)
+ : data(nullptr), size(0)
+{
+ CV_UNUSED(_size);
+ throw_no_cuda();
+}
+
+GpuData::~GpuData()
+{
+}
+
+void GpuMatND::create(SizeArray _size, int _type)
+{
+ CV_UNUSED(_size);
+ CV_UNUSED(_type);
+ throw_no_cuda();
+}
+
+void GpuMatND::release()
+{
+ throw_no_cuda();
+}
+
+GpuMatND GpuMatND::clone() const
+{
+ throw_no_cuda();
+}
+
+GpuMatND GpuMatND::clone(Stream& stream) const
+{
+ CV_UNUSED(stream);
+ throw_no_cuda();
+}
+
+void GpuMatND::upload(InputArray src)
+{
+ CV_UNUSED(src);
+ throw_no_cuda();
+}
+
+void GpuMatND::upload(InputArray src, Stream& stream)
+{
+ CV_UNUSED(src);
+ CV_UNUSED(stream);
+ throw_no_cuda();
+}
+
+void GpuMatND::download(OutputArray dst) const
+{
+ CV_UNUSED(dst);
+ throw_no_cuda();
+}
+
+void GpuMatND::download(OutputArray dst, Stream& stream) const
+{
+ CV_UNUSED(dst);
+ CV_UNUSED(stream);
+ throw_no_cuda();
+}
+
+#endif