)
endforeach()
endif()
+find_host_package(PythonInterp 2.7)
+if(NOT PYTHONINTERP_FOUND)
find_host_package(PythonInterp "${MIN_VER_PYTHON}")
+endif()
unset(HAVE_SPHINX CACHE)
.. ocv:function:: void calcCovarMatrix( const Mat* samples, int nsamples, Mat& covar, Mat& mean, int flags, int ctype=CV_64F)
-.. ocv:function:: void calcCovarMatrix( InputArray samples, OutputArray covar, OutputArray mean, int flags, int ctype=CV_64F)
+.. ocv:function:: void calcCovarMatrix( InputArray samples, OutputArray covar, InputOutputArray mean, int flags, int ctype=CV_64F)
.. ocv:pyfunction:: cv2.calcCovarMatrix(samples, flags[, covar[, mean[, ctype]]]) -> covar, mean
//! swaps two matrices
CV_EXPORTS void swap(Mat& a, Mat& b);
+//! swaps two umatrices
+CV_EXPORTS void swap( UMat& a, UMat& b );
+
//! 1D interpolation function: returns coordinate of the "donor" pixel for the specified location p.
CV_EXPORTS_W int borderInterpolate(int p, int len, int borderType);
//! computes covariation matrix of a set of samples
CV_EXPORTS_W void calcCovarMatrix( InputArray samples, OutputArray covar,
- OutputArray mean, int flags, int ctype = CV_64F);
+ InputOutputArray mean, int flags, int ctype = CV_64F);
CV_EXPORTS_W void PCACompute(InputArray data, InputOutputArray mean,
OutputArray eigenvectors, int maxComponents = 0);
class CV_EXPORTS Mat;
class CV_EXPORTS MatExpr;
+class CV_EXPORTS UMat;
+class CV_EXPORTS UMatExpr;
+
class CV_EXPORTS SparseMat;
typedef Mat MatND;
inline
Mat::Mat(const cuda::GpuMat& m)
- : flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
+ : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
{
m.download(*this);
}
namespace cv
{
+enum { ACCESS_READ=1<<24, ACCESS_WRITE=1<<25,
+ ACCESS_RW=3<<24, ACCESS_MASK=ACCESS_RW, ACCESS_FAST=1<<26 };
+
//////////////////////// Input/Output Array Arguments /////////////////////////////////
/*!
KIND_SHIFT = 16,
FIXED_TYPE = 0x8000 << KIND_SHIFT,
FIXED_SIZE = 0x4000 << KIND_SHIFT,
- KIND_MASK = ~(FIXED_TYPE|FIXED_SIZE) - (1 << KIND_SHIFT) + 1,
+ KIND_MASK = 31 << KIND_SHIFT,
NONE = 0 << KIND_SHIFT,
MAT = 1 << KIND_SHIFT,
OPENGL_BUFFER = 7 << KIND_SHIFT,
CUDA_MEM = 8 << KIND_SHIFT,
GPU_MAT = 9 << KIND_SHIFT,
- OCL_MAT =10 << KIND_SHIFT
+ OCL_MAT =10 << KIND_SHIFT,
+ UMAT =11 << KIND_SHIFT,
+ STD_VECTOR_UMAT =12 << KIND_SHIFT,
+ UEXPR =13 << KIND_SHIFT
};
_InputArray();
+ _InputArray(int _flags, void* _obj);
_InputArray(const Mat& m);
_InputArray(const MatExpr& expr);
_InputArray(const std::vector<Mat>& vec);
_InputArray(const ogl::Buffer& buf);
_InputArray(const cuda::CudaMem& cuda_mem);
template<typename _Tp> _InputArray(const cudev::GpuMat_<_Tp>& m);
+ _InputArray(const UMat& um);
+ _InputArray(const std::vector<UMat>& umv);
+ _InputArray(const UMatExpr& uexpr);
- virtual Mat getMat(int i=-1) const;
+ virtual Mat getMat(int idx=-1) const;
+ virtual UMat getUMat(int idx=-1) const;
virtual void getMatVector(std::vector<Mat>& mv) const;
virtual cuda::GpuMat getGpuMat() const;
virtual ogl::Buffer getOGlBuffer() const;
+ void* getObj() const;
virtual int kind() const;
virtual Size size(int i=-1) const;
virtual ~_InputArray();
+protected:
int flags;
void* obj;
Size sz;
+
+ void init(int _flags, const void* _obj);
+ void init(int _flags, const void* _obj, Size _sz);
};
};
_OutputArray();
+ _OutputArray(int _flags, void* _obj);
_OutputArray(Mat& m);
_OutputArray(std::vector<Mat>& vec);
_OutputArray(cuda::GpuMat& d_mat);
template<typename _Tp> _OutputArray(Mat_<_Tp>& m);
template<typename _Tp> _OutputArray(_Tp* vec, int n);
template<typename _Tp, int m, int n> _OutputArray(Matx<_Tp, m, n>& matx);
+ _OutputArray(UMat& m);
+ _OutputArray(std::vector<UMat>& vec);
_OutputArray(const Mat& m);
_OutputArray(const std::vector<Mat>& vec);
template<typename _Tp> _OutputArray(const Mat_<_Tp>& m);
template<typename _Tp> _OutputArray(const _Tp* vec, int n);
template<typename _Tp, int m, int n> _OutputArray(const Matx<_Tp, m, n>& matx);
+ _OutputArray(const UMat& m);
+ _OutputArray(const std::vector<UMat>& vec);
virtual bool fixedSize() const;
virtual bool fixedType() const;
virtual void create(int dims, const int* size, int type, int i=-1, bool allowTransposed=false, int fixedDepthMask=0) const;
virtual void release() const;
virtual void clear() const;
+};
+
- virtual ~_OutputArray();
+class CV_EXPORTS _InputOutputArray : public _OutputArray
+{
+public:
+ _InputOutputArray();
+ _InputOutputArray(int _flags, void* _obj);
+ _InputOutputArray(Mat& m);
+ _InputOutputArray(std::vector<Mat>& vec);
+ _InputOutputArray(cuda::GpuMat& d_mat);
+ _InputOutputArray(ogl::Buffer& buf);
+ _InputOutputArray(cuda::CudaMem& cuda_mem);
+ template<typename _Tp> _InputOutputArray(cudev::GpuMat_<_Tp>& m);
+ template<typename _Tp> _InputOutputArray(std::vector<_Tp>& vec);
+ template<typename _Tp> _InputOutputArray(std::vector<std::vector<_Tp> >& vec);
+ template<typename _Tp> _InputOutputArray(std::vector<Mat_<_Tp> >& vec);
+ template<typename _Tp> _InputOutputArray(Mat_<_Tp>& m);
+ template<typename _Tp> _InputOutputArray(_Tp* vec, int n);
+ template<typename _Tp, int m, int n> _InputOutputArray(Matx<_Tp, m, n>& matx);
+ _InputOutputArray(UMat& m);
+ _InputOutputArray(std::vector<UMat>& vec);
+
+ _InputOutputArray(const Mat& m);
+ _InputOutputArray(const std::vector<Mat>& vec);
+ _InputOutputArray(const cuda::GpuMat& d_mat);
+ _InputOutputArray(const ogl::Buffer& buf);
+ _InputOutputArray(const cuda::CudaMem& cuda_mem);
+ template<typename _Tp> _InputOutputArray(const cudev::GpuMat_<_Tp>& m);
+ template<typename _Tp> _InputOutputArray(const std::vector<_Tp>& vec);
+ template<typename _Tp> _InputOutputArray(const std::vector<std::vector<_Tp> >& vec);
+ template<typename _Tp> _InputOutputArray(const std::vector<Mat_<_Tp> >& vec);
+ template<typename _Tp> _InputOutputArray(const Mat_<_Tp>& m);
+ template<typename _Tp> _InputOutputArray(const _Tp* vec, int n);
+ template<typename _Tp, int m, int n> _InputOutputArray(const Matx<_Tp, m, n>& matx);
+ _InputOutputArray(const UMat& m);
+ _InputOutputArray(const std::vector<UMat>& vec);
};
typedef const _InputArray& InputArray;
typedef InputArray InputArrayOfArrays;
typedef const _OutputArray& OutputArray;
typedef OutputArray OutputArrayOfArrays;
-typedef OutputArray InputOutputArray;
-typedef OutputArray InputOutputArrayOfArrays;
-
-CV_EXPORTS OutputArray noArray();
-
+typedef const _InputOutputArray& InputOutputArray;
+typedef InputOutputArray InputOutputArrayOfArrays;
+CV_EXPORTS InputOutputArray noArray();
/////////////////////////////////// MatAllocator //////////////////////////////////////
+struct CV_EXPORTS UMatData;
+
/*!
Custom array allocator
public:
MatAllocator() {}
virtual ~MatAllocator() {}
- virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
- uchar*& datastart, uchar*& data, size_t* step) = 0;
- virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
-};
+ // let's comment it off for now to detect and fix all the uses of allocator
+ //virtual void allocate(int dims, const int* sizes, int type, int*& refcount,
+ // uchar*& datastart, uchar*& data, size_t* step) = 0;
+ //virtual void deallocate(int* refcount, uchar* datastart, uchar* data) = 0;
+ virtual UMatData* allocate(int dims, const int* sizes,
+ int type, size_t* step) const = 0;
+ virtual bool allocate(UMatData* data, int accessflags) const = 0;
+ virtual void deallocate(UMatData* data) const = 0;
+ virtual void map(UMatData* data, int accessflags) const = 0;
+ virtual void unmap(UMatData* data) const = 0;
+ virtual void download(UMatData* data, void* dst, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dststep[]) const = 0;
+ virtual void upload(UMatData* data, const void* src, int dims, const size_t sz[],
+ const size_t dstofs[], const size_t dststep[],
+ const size_t srcstep[]) const = 0;
+ virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dstofs[], const size_t dststep[], bool sync) const = 0;
+};
//////////////////////////////// MatCommaInitializer //////////////////////////////////
};
+/////////////////////////////////////// Mat ///////////////////////////////////////////
+// note that umatdata might be allocated together
+// with the matrix data, not as a separate object.
+// therefore, it does not have constructor or destructor;
+// it should be explicitly initialized using init().
+struct CV_EXPORTS UMatData
+{
+ enum { COPY_ON_MAP=1, HOST_COPY_OBSOLETE=2,
+ DEVICE_COPY_OBSOLETE=4, TEMP_UMAT=8, TEMP_COPIED_UMAT=24 };
+ UMatData(const MatAllocator* allocator);
+
+ // provide atomic access to the structure
+ void lock();
+ void unlock();
+
+ bool hostCopyObsolete() const;
+ bool deviceCopyObsolete() const;
+ bool copyOnMap() const;
+ bool tempUMat() const;
+ bool tempCopiedUMat() const;
+ void markHostCopyObsolete(bool flag);
+ void markDeviceCopyObsolete(bool flag);
+
+ const MatAllocator* prevAllocator;
+ const MatAllocator* currAllocator;
+ int urefcount;
+ int refcount;
+ uchar* data;
+ uchar* origdata;
+ size_t size;
-/////////////////////////////////////// Mat ///////////////////////////////////////////
+ int flags;
+ void* handle;
+ void* userdata;
+};
-/*!
+
+struct CV_EXPORTS UMatDataAutoLock
+{
+ UMatDataAutoLock(UMatData* u);
+ ~UMatDataAutoLock();
+ UMatData* u;
+};
+
+
+struct CV_EXPORTS MatSize
+{
+ MatSize(int* _p);
+ Size operator()() const;
+ const int& operator[](int i) const;
+ int& operator[](int i);
+ operator const int*() const;
+ bool operator == (const MatSize& sz) const;
+ bool operator != (const MatSize& sz) const;
+
+ int* p;
+};
+
+struct CV_EXPORTS MatStep
+{
+ MatStep();
+ MatStep(size_t s);
+ const size_t& operator[](int i) const;
+ size_t& operator[](int i);
+ operator size_t() const;
+ MatStep& operator = (size_t s);
+
+ size_t* p;
+ size_t buf[2];
+protected:
+ MatStep& operator = (const MatStep&);
+};
+
+ /*!
The n-dimensional matrix class.
The class represents an n-dimensional dense numerical array that can act as
//! builds matrix from comma initializer
template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
- // //! converts old-style CvMat to the new matrix; the data is not copied by default
- // Mat(const CvMat* m, bool copyData=false);
- // //! converts old-style CvMatND to the new matrix; the data is not copied by default
- // Mat(const CvMatND* m, bool copyData=false);
- // //! converts old-style IplImage to the new matrix; the data is not copied by default
- // Mat(const IplImage* img, bool copyData=false);
- //Mat(const void* img, bool copyData=false);
-
//! download data from GpuMat
explicit Mat(const cuda::GpuMat& m);
Mat& operator = (const Mat& m);
Mat& operator = (const MatExpr& expr);
+ //! retrieve UMat from Mat
+ UMat getUMat(int accessFlags) const;
+
//! returns a new matrix header for the specified row
Mat row(int y) const;
//! returns a new matrix header for the specified column
//! pointer to the data
uchar* data;
- //! pointer to the reference counter;
- // when matrix points to user-allocated data, the pointer is NULL
- int* refcount;
-
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
//! custom allocator
MatAllocator* allocator;
+ //! and the standard allocator
+ static MatAllocator* getStdAllocator();
- struct CV_EXPORTS MSize
- {
- MSize(int* _p);
- Size operator()() const;
- const int& operator[](int i) const;
- int& operator[](int i);
- operator const int*() const;
- bool operator == (const MSize& sz) const;
- bool operator != (const MSize& sz) const;
-
- int* p;
- };
+ //! interaction with UMat
+ UMatData* u;
- struct CV_EXPORTS MStep
- {
- MStep();
- MStep(size_t s);
- const size_t& operator[](int i) const;
- size_t& operator[](int i);
- operator size_t() const;
- MStep& operator = (size_t s);
-
- size_t* p;
- size_t buf[2];
- protected:
- MStep& operator = (const MStep&);
- };
-
- MSize size;
- MStep step;
+ MatSize size;
+ MatStep step;
protected:
};
typedef Mat_<Vec4d> Mat4d;
+class CV_EXPORTS UMatExpr;
+
+class CV_EXPORTS UMat
+{
+public:
+ //! default constructor
+ UMat();
+ //! constructs 2D matrix of the specified size and type
+ // (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
+ UMat(int rows, int cols, int type);
+ UMat(Size size, int type);
+ //! constucts 2D matrix and fills it with the specified value _s.
+ UMat(int rows, int cols, int type, const Scalar& s);
+ UMat(Size size, int type, const Scalar& s);
+
+ //! constructs n-dimensional matrix
+ UMat(int ndims, const int* sizes, int type);
+ UMat(int ndims, const int* sizes, int type, const Scalar& s);
+
+ //! copy constructor
+ UMat(const UMat& m);
+
+ //! creates a matrix header for a part of the bigger matrix
+ UMat(const UMat& m, const Range& rowRange, const Range& colRange=Range::all());
+ UMat(const UMat& m, const Rect& roi);
+ UMat(const UMat& m, const Range* ranges);
+ //! builds matrix from std::vector with or without copying the data
+ template<typename _Tp> explicit UMat(const std::vector<_Tp>& vec, bool copyData=false);
+ //! builds matrix from cv::Vec; the data is copied by default
+ template<typename _Tp, int n> explicit UMat(const Vec<_Tp, n>& vec, bool copyData=true);
+ //! builds matrix from cv::Matx; the data is copied by default
+ template<typename _Tp, int m, int n> explicit UMat(const Matx<_Tp, m, n>& mtx, bool copyData=true);
+ //! builds matrix from a 2D point
+ template<typename _Tp> explicit UMat(const Point_<_Tp>& pt, bool copyData=true);
+ //! builds matrix from a 3D point
+ template<typename _Tp> explicit UMat(const Point3_<_Tp>& pt, bool copyData=true);
+ //! builds matrix from comma initializer
+ template<typename _Tp> explicit UMat(const MatCommaInitializer_<_Tp>& commaInitializer);
+
+ //! destructor - calls release()
+ ~UMat();
+ //! assignment operators
+ UMat& operator = (const UMat& m);
+ UMat& operator = (const UMatExpr& expr);
+
+ Mat getMat(int flags) const;
+
+ //! returns a new matrix header for the specified row
+ UMat row(int y) const;
+ //! returns a new matrix header for the specified column
+ UMat col(int x) const;
+ //! ... for the specified row span
+ UMat rowRange(int startrow, int endrow) const;
+ UMat rowRange(const Range& r) const;
+ //! ... for the specified column span
+ UMat colRange(int startcol, int endcol) const;
+ UMat colRange(const Range& r) const;
+ //! ... for the specified diagonal
+ // (d=0 - the main diagonal,
+ // >0 - a diagonal from the lower half,
+ // <0 - a diagonal from the upper half)
+ UMat diag(int d=0) const;
+ //! constructs a square diagonal matrix which main diagonal is vector "d"
+ static UMat diag(const UMat& d);
+
+ //! returns deep copy of the matrix, i.e. the data is copied
+ UMat clone() const;
+ //! copies the matrix content to "m".
+ // It calls m.create(this->size(), this->type()).
+ void copyTo( OutputArray m ) const;
+ //! copies those matrix elements to "m" that are marked with non-zero mask elements.
+ void copyTo( OutputArray m, InputArray mask ) const;
+ //! converts matrix to another datatype with optional scalng. See cvConvertScale.
+ void convertTo( OutputArray m, int rtype, double alpha=1, double beta=0 ) const;
+
+ void assignTo( UMat& m, int type=-1 ) const;
+
+ //! sets every matrix element to s
+ UMat& operator = (const Scalar& s);
+ //! sets some of the matrix elements to s, according to the mask
+ UMat& setTo(InputArray value, InputArray mask=noArray());
+ //! creates alternative matrix header for the same data, with different
+ // number of channels and/or different number of rows. see cvReshape.
+ UMat reshape(int cn, int rows=0) const;
+ UMat reshape(int cn, int newndims, const int* newsz) const;
+
+ //! matrix transposition by means of matrix expressions
+ UMatExpr t() const;
+ //! matrix inversion by means of matrix expressions
+ UMatExpr inv(int method=DECOMP_LU) const;
+ //! per-element matrix multiplication by means of matrix expressions
+ UMatExpr mul(InputArray m, double scale=1) const;
+
+ //! computes cross-product of 2 3D vectors
+ UMat cross(InputArray m) const;
+ //! computes dot-product
+ double dot(InputArray m) const;
+
+ //! Matlab-style matrix initialization
+ static UMatExpr zeros(int rows, int cols, int type);
+ static UMatExpr zeros(Size size, int type);
+ static UMatExpr zeros(int ndims, const int* sz, int type);
+ static UMatExpr ones(int rows, int cols, int type);
+ static UMatExpr ones(Size size, int type);
+ static UMatExpr ones(int ndims, const int* sz, int type);
+ static UMatExpr eye(int rows, int cols, int type);
+ static UMatExpr eye(Size size, int type);
+
+ //! allocates new matrix data unless the matrix already has specified size and type.
+ // previous data is unreferenced if needed.
+ void create(int rows, int cols, int type);
+ void create(Size size, int type);
+ void create(int ndims, const int* sizes, int type);
+
+ //! increases the reference counter; use with care to avoid memleaks
+ void addref();
+ //! decreases reference counter;
+ // deallocates the data when reference counter reaches 0.
+ void release();
+
+ //! deallocates the matrix data
+ void deallocate();
+ //! internal use function; properly re-allocates _size, _step arrays
+ void copySize(const UMat& m);
+
+ //! locates matrix header within a parent matrix. See below
+ void locateROI( Size& wholeSize, Point& ofs ) const;
+ //! moves/resizes the current matrix ROI inside the parent matrix.
+ UMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
+ //! extracts a rectangular sub-matrix
+ // (this is a generalized form of row, rowRange etc.)
+ UMat operator()( Range rowRange, Range colRange ) const;
+ UMat operator()( const Rect& roi ) const;
+ UMat operator()( const Range* ranges ) const;
+
+ //! returns true iff the matrix data is continuous
+ // (i.e. when there are no gaps between successive rows).
+ // similar to CV_IS_MAT_CONT(cvmat->type)
+ bool isContinuous() const;
+
+ //! returns true if the matrix is a submatrix of another matrix
+ bool isSubmatrix() const;
+
+ //! returns element size in bytes,
+ // similar to CV_ELEM_SIZE(cvmat->type)
+ size_t elemSize() const;
+ //! returns the size of element channel in bytes.
+ size_t elemSize1() const;
+ //! returns element type, similar to CV_MAT_TYPE(cvmat->type)
+ int type() const;
+ //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)
+ int depth() const;
+ //! returns element type, similar to CV_MAT_CN(cvmat->type)
+ int channels() const;
+ //! returns step/elemSize1()
+ size_t step1(int i=0) const;
+ //! returns true if matrix data is NULL
+ bool empty() const;
+ //! returns the total number of matrix elements
+ size_t total() const;
+
+ //! returns N if the matrix is 1-channel (N x ptdim) or ptdim-channel (1 x N) or (N x 1); negative number otherwise
+ int checkVector(int elemChannels, int depth=-1, bool requireContinuous=true) const;
+
+ void* handle(int accessFlags) const;
+ void ndoffset(size_t* ofs) const;
+
+ enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
+ enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
+
+ /*! includes several bit-fields:
+ - the magic signature
+ - continuity flag
+ - depth
+ - number of channels
+ */
+ int flags;
+ //! the matrix dimensionality, >= 2
+ int dims;
+ //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
+ int rows, cols;
+
+ //! custom allocator
+ MatAllocator* allocator;
+ //! and the standard allocator
+ static MatAllocator* getStdAllocator();
+
+ // black-box container of UMat data
+ UMatData* u;
+
+ // offset of the submatrix (or 0)
+ size_t offset;
+
+ MatSize size;
+ MatStep step;
+
+protected:
+};
+
/////////////////////////// multi-dimensional sparse matrix //////////////////////////
//////////////////////// Input/Output Arrays ////////////////////////
+inline void _InputArray::init(int _flags, const void* _obj)
+{ flags = _flags; obj = (void*)_obj; }
+
+inline void _InputArray::init(int _flags, const void* _obj, Size _sz)
+{ flags = _flags; obj = (void*)_obj; sz = _sz; }
+
+inline void* _InputArray::getObj() const { return obj; }
+
+inline _InputArray::_InputArray() { init(0, 0); }
+inline _InputArray::_InputArray(int _flags, void* _obj) { init(_flags, _obj); }
+inline _InputArray::_InputArray(const Mat& m) { init(MAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_READ, &vec); }
+inline _InputArray::_InputArray(const UMat& m) { init(UMAT+ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_READ, &vec); }
+
template<typename _Tp> inline
_InputArray::_InputArray(const std::vector<_Tp>& vec)
- : flags(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type), obj((void*)&vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
template<typename _Tp> inline
_InputArray::_InputArray(const std::vector<std::vector<_Tp> >& vec)
- : flags(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type), obj((void*)&vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_READ, &vec); }
template<typename _Tp> inline
_InputArray::_InputArray(const std::vector<Mat_<_Tp> >& vec)
- : flags(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type), obj((void*)&vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_READ, &vec); }
template<typename _Tp, int m, int n> inline
_InputArray::_InputArray(const Matx<_Tp, m, n>& mtx)
- : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)&mtx), sz(n, m)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, &mtx, Size(n, m)); }
template<typename _Tp> inline
_InputArray::_InputArray(const _Tp* vec, int n)
- : flags(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type), obj((void*)vec), sz(n, 1)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_READ, vec, Size(n, 1)); }
template<typename _Tp> inline
_InputArray::_InputArray(const Mat_<_Tp>& m)
- : flags(FIXED_TYPE + MAT + DataType<_Tp>::type), obj((void*)&m)
-{}
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_READ, &m); }
+inline _InputArray::_InputArray(const double& val)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F + ACCESS_READ, &val, Size(1,1)); }
+
+inline _InputArray::_InputArray(const MatExpr& expr)
+{ init(FIXED_TYPE + FIXED_SIZE + EXPR + ACCESS_READ, &expr); }
+
+inline _InputArray::_InputArray(const cuda::GpuMat& d_mat)
+{ init(GPU_MAT + ACCESS_READ, &d_mat); }
+
+inline _InputArray::_InputArray(const ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_READ, &buf); }
+
+inline _InputArray::_InputArray(const cuda::CudaMem& cuda_mem)
+{ init(CUDA_MEM + ACCESS_READ, &cuda_mem); }
+
+inline _InputArray::~_InputArray() {}
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+inline _OutputArray::_OutputArray() { init(ACCESS_WRITE, 0); }
+inline _OutputArray::_OutputArray(int _flags, void* _obj) { init(_flags|ACCESS_WRITE, _obj); }
+inline _OutputArray::_OutputArray(Mat& m) { init(MAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_WRITE, &vec); }
+inline _OutputArray::_OutputArray(UMat& m) { init(UMAT+ACCESS_WRITE, &m); }
+inline _OutputArray::_OutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(std::vector<_Tp>& vec)
- : _InputArray(vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(std::vector<std::vector<_Tp> >& vec)
- : _InputArray(vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(std::vector<Mat_<_Tp> >& vec)
- : _InputArray(vec)
-{}
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(Mat_<_Tp>& m)
- : _InputArray(m)
-{}
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
template<typename _Tp, int m, int n> inline
_OutputArray::_OutputArray(Matx<_Tp, m, n>& mtx)
- : _InputArray(mtx)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
template<typename _Tp> inline
_OutputArray::_OutputArray(_Tp* vec, int n)
- : _InputArray(vec, n)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
template<typename _Tp> inline
_OutputArray::_OutputArray(const std::vector<_Tp>& vec)
- : _InputArray(vec)
-{
- flags |= FIXED_SIZE;
-}
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(const std::vector<std::vector<_Tp> >& vec)
- : _InputArray(vec)
-{
- flags |= FIXED_SIZE;
-}
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(const std::vector<Mat_<_Tp> >& vec)
- : _InputArray(vec)
-{
- flags |= FIXED_SIZE;
-}
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_WRITE, &vec); }
template<typename _Tp> inline
_OutputArray::_OutputArray(const Mat_<_Tp>& m)
- : _InputArray(m)
-{
- flags |= FIXED_SIZE;
-}
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_WRITE, &m); }
template<typename _Tp, int m, int n> inline
_OutputArray::_OutputArray(const Matx<_Tp, m, n>& mtx)
- : _InputArray(mtx)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, &mtx, Size(n, m)); }
template<typename _Tp> inline
_OutputArray::_OutputArray(const _Tp* vec, int n)
- : _InputArray(vec, n)
-{}
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_WRITE, vec, Size(n, 1)); }
+
+inline _OutputArray::_OutputArray(cuda::GpuMat& d_mat)
+{ init(GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+inline _OutputArray::_OutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+
+inline _OutputArray::_OutputArray(cuda::CudaMem& cuda_mem)
+{ init(CUDA_MEM + ACCESS_WRITE, &cuda_mem); }
+
+inline _OutputArray::_OutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_WRITE, &m); }
+
+inline _OutputArray::_OutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_WRITE, &vec); }
+
+inline _OutputArray::_OutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_WRITE, &d_mat); }
+
+inline _OutputArray::_OutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_WRITE, &buf); }
+inline _OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_WRITE, &cuda_mem); }
+///////////////////////////////////////////////////////////////////////////////////////////
-//////////////////////////////// Mat ////////////////////////////////
+inline _InputOutputArray::_InputOutputArray() { init(ACCESS_RW, 0); }
+inline _InputOutputArray::_InputOutputArray(int _flags, void* _obj) { init(_flags|ACCESS_RW, _obj); }
+inline _InputOutputArray::_InputOutputArray(Mat& m) { init(MAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<Mat>& vec) { init(STD_VECTOR_MAT+ACCESS_RW, &vec); }
+inline _InputOutputArray::_InputOutputArray(UMat& m) { init(UMAT+ACCESS_RW, &m); }
+inline _InputOutputArray::_InputOutputArray(std::vector<UMat>& vec) { init(STD_VECTOR_UMAT+ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(Mat_<_Tp>& m)
+{ init(FIXED_TYPE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(_Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<_Tp>& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<std::vector<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_VECTOR + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const std::vector<Mat_<_Tp> >& vec)
+{ init(FIXED_TYPE + FIXED_SIZE + STD_VECTOR_MAT + DataType<_Tp>::type + ACCESS_RW, &vec); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const Mat_<_Tp>& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + DataType<_Tp>::type + ACCESS_RW, &m); }
+
+template<typename _Tp, int m, int n> inline
+_InputOutputArray::_InputOutputArray(const Matx<_Tp, m, n>& mtx)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, &mtx, Size(n, m)); }
+
+template<typename _Tp> inline
+_InputOutputArray::_InputOutputArray(const _Tp* vec, int n)
+{ init(FIXED_TYPE + FIXED_SIZE + MATX + DataType<_Tp>::type + ACCESS_RW, vec, Size(n, 1)); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::GpuMat& d_mat)
+{ init(GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(ogl::Buffer& buf)
+{ init(OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(cuda::CudaMem& cuda_mem)
+{ init(CUDA_MEM + ACCESS_RW, &cuda_mem); }
+
+inline _InputOutputArray::_InputOutputArray(const Mat& m)
+{ init(FIXED_TYPE + FIXED_SIZE + MAT + ACCESS_RW, &m); }
+
+inline _InputOutputArray::_InputOutputArray(const std::vector<Mat>& vec)
+{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
+{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_RW, &d_mat); }
+
+inline _InputOutputArray::_InputOutputArray(const ogl::Buffer& buf)
+{ init(FIXED_TYPE + FIXED_SIZE + OPENGL_BUFFER + ACCESS_RW, &buf); }
+
+inline _InputOutputArray::_InputOutputArray(const cuda::CudaMem& cuda_mem)
+{ init(FIXED_TYPE + FIXED_SIZE + CUDA_MEM + ACCESS_RW, &cuda_mem); }
+
+//////////////////////////////////////////// Mat //////////////////////////////////////////
inline
Mat::Mat()
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{}
inline
Mat::Mat(int _rows, int _cols, int _type)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create(_rows, _cols, _type);
}
inline
Mat::Mat(int _rows, int _cols, int _type, const Scalar& _s)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create(_rows, _cols, _type);
*this = _s;
inline
Mat::Mat(Size _sz, int _type)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create( _sz.height, _sz.width, _type );
}
inline
Mat::Mat(Size _sz, int _type, const Scalar& _s)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create(_sz.height, _sz.width, _type);
*this = _s;
inline
Mat::Mat(int _dims, const int* _sz, int _type)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create(_dims, _sz, _type);
}
inline
Mat::Mat(int _dims, const int* _sz, int _type, const Scalar& _s)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
create(_dims, _sz, _type);
*this = _s;
inline
Mat::Mat(const Mat& m)
- : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data), refcount(m.refcount),
+ : flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), data(m.data),
datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit), allocator(m.allocator),
- size(&rows)
+ u(m.u), size(&rows)
{
- if( refcount )
- CV_XADD(refcount, 1);
+ if( u )
+ CV_XADD(&u->refcount, 1);
if( m.dims <= 2 )
{
step[0] = m.step[0]; step[1] = m.step[1];
inline
Mat::Mat(int _rows, int _cols, int _type, void* _data, size_t _step)
: flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_rows), cols(_cols),
- data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0), datalimit(0),
- allocator(0), size(&rows)
+ data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+ allocator(0), u(0), size(&rows)
{
size_t esz = CV_ELEM_SIZE(_type);
size_t minstep = cols * esz;
inline
Mat::Mat(Size _sz, int _type, void* _data, size_t _step)
: flags(MAGIC_VAL + (_type & TYPE_MASK)), dims(2), rows(_sz.height), cols(_sz.width),
- data((uchar*)_data), refcount(0), datastart((uchar*)_data), dataend(0), datalimit(0),
- allocator(0), size(&rows)
+ data((uchar*)_data), datastart((uchar*)_data), dataend(0), datalimit(0),
+ allocator(0), u(0), size(&rows)
{
size_t esz = CV_ELEM_SIZE(_type);
size_t minstep = cols*esz;
template<typename _Tp> inline
Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
- cols(1), data(0), refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ cols(1), data(0), datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
if(vec.empty())
return;
template<typename _Tp, int n> inline
Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(n), cols(1), data(0),
- refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
if( !copyData )
{
template<typename _Tp, int m, int n> inline
Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(m), cols(n), data(0),
- refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
if( !copyData )
{
template<typename _Tp> inline
Mat::Mat(const Point_<_Tp>& pt, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(2), cols(1), data(0),
- refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
if( !copyData )
{
template<typename _Tp> inline
Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows(3), cols(1), data(0),
- refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
if( !copyData )
{
template<typename _Tp> inline
Mat::Mat(const MatCommaInitializer_<_Tp>& commaInitializer)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(0), rows(0), cols(0), data(0),
- refcount(0), datastart(0), dataend(0), allocator(0), size(&rows)
+ datastart(0), dataend(0), allocator(0), u(0), size(&rows)
{
*this = commaInitializer.operator Mat_<_Tp>();
}
{
if( this != &m )
{
- if( m.refcount )
- CV_XADD(m.refcount, 1);
+ if( m.u )
+ CV_XADD(&m.u->refcount, 1);
release();
flags = m.flags;
if( dims <= 2 && m.dims <= 2 )
datastart = m.datastart;
dataend = m.dataend;
datalimit = m.datalimit;
- refcount = m.refcount;
allocator = m.allocator;
+ u = m.u;
}
return *this;
}
inline
void Mat::addref()
{
- if( refcount )
- CV_XADD(refcount, 1);
+ if( u )
+ CV_XADD(&u->refcount, 1);
}
inline void Mat::release()
{
- if( refcount && CV_XADD(refcount, -1) == 1 )
+ if( u && CV_XADD(&u->refcount, -1) == 1 )
deallocate();
data = datastart = dataend = datalimit = 0;
size.p[0] = 0;
- refcount = 0;
+ u = 0;
}
inline
push_back((const Mat&)m);
}
-
-
-///////////////////////////// Mat::MSize ////////////////////////////
+///////////////////////////// MatSize ////////////////////////////
inline
-Mat::MSize::MSize(int* _p)
+MatSize::MatSize(int* _p)
: p(_p) {}
inline
-Size Mat::MSize::operator()() const
+Size MatSize::operator()() const
{
CV_DbgAssert(p[-1] <= 2);
return Size(p[1], p[0]);
}
inline
-const int& Mat::MSize::operator[](int i) const
+const int& MatSize::operator[](int i) const
{
return p[i];
}
inline
-int& Mat::MSize::operator[](int i)
+int& MatSize::operator[](int i)
{
return p[i];
}
inline
-Mat::MSize::operator const int*() const
+MatSize::operator const int*() const
{
return p;
}
inline
-bool Mat::MSize::operator == (const MSize& sz) const
+bool MatSize::operator == (const MatSize& sz) const
{
int d = p[-1];
int dsz = sz.p[-1];
}
inline
-bool Mat::MSize::operator != (const MSize& sz) const
+bool MatSize::operator != (const MatSize& sz) const
{
return !(*this == sz);
}
-///////////////////////////// Mat::MStep ////////////////////////////
+///////////////////////////// MatStep ////////////////////////////
inline
-Mat::MStep::MStep()
+MatStep::MatStep()
{
p = buf; p[0] = p[1] = 0;
}
inline
-Mat::MStep::MStep(size_t s)
+MatStep::MatStep(size_t s)
{
p = buf; p[0] = s; p[1] = 0;
}
inline
-const size_t& Mat::MStep::operator[](int i) const
+const size_t& MatStep::operator[](int i) const
{
return p[i];
}
inline
-size_t& Mat::MStep::operator[](int i)
+size_t& MatStep::operator[](int i)
{
return p[i];
}
-inline Mat::MStep::operator size_t() const
+inline MatStep::operator size_t() const
{
CV_DbgAssert( p == buf );
return buf[0];
}
-inline Mat::MStep& Mat::MStep::operator = (size_t s)
+inline MatStep& MatStep::operator = (size_t s)
{
CV_DbgAssert( p == buf );
buf[0] = s;
}
-/*template<typename T1, typename T2, typename Op> inline
-void process( const Mat_<T1>& m1, Mat_<T2>& m2, Op op )
-{
- int y, x, rows = m1.rows, cols = m1.cols;
-
- CV_DbgAssert( m1.size() == m2.size() );
-
- for( y = 0; y < rows; y++ )
- {
- const T1* src = m1[y];
- T2* dst = m2[y];
-
- for( x = 0; x < cols; x++ )
- dst[x] = op(src[x]);
- }
-}
-
-template<typename T1, typename T2, typename T3, typename Op> inline
-void process( const Mat_<T1>& m1, const Mat_<T2>& m2, Mat_<T3>& m3, Op op )
-{
- int y, x, rows = m1.rows, cols = m1.cols;
-
- CV_DbgAssert( m1.size() == m2.size() );
-
- for( y = 0; y < rows; y++ )
- {
- const T1* src1 = m1[y];
- const T2* src2 = m2[y];
- T3* dst = m3[y];
-
- for( x = 0; x < cols; x++ )
- dst[x] = op( src1[x], src2[x] );
- }
-}*/
-
-
-
///////////////////////////// SparseMat /////////////////////////////
inline
return a;
}
+
+//////////////////////////////// UMat ////////////////////////////////
+
+inline
+UMat::UMat()
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create(_rows, _cols, _type);
+}
+
+inline
+UMat::UMat(int _rows, int _cols, int _type, const Scalar& _s)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create(_rows, _cols, _type);
+ *this = _s;
+}
+
+inline
+UMat::UMat(Size _sz, int _type)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create( _sz.height, _sz.width, _type );
+}
+
+inline
+UMat::UMat(Size _sz, int _type, const Scalar& _s)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create(_sz.height, _sz.width, _type);
+ *this = _s;
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create(_dims, _sz, _type);
+}
+
+inline
+UMat::UMat(int _dims, const int* _sz, int _type, const Scalar& _s)
+: flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ create(_dims, _sz, _type);
+ *this = _s;
+}
+
+inline
+UMat::UMat(const UMat& m)
+: flags(m.flags), dims(m.dims), rows(m.rows), cols(m.cols), allocator(m.allocator),
+u(m.u), offset(m.offset), size(&rows)
+{
+ if( u )
+ CV_XADD(&(u->urefcount), 1);
+ if( m.dims <= 2 )
+ {
+ step[0] = m.step[0]; step[1] = m.step[1];
+ }
+ else
+ {
+ dims = 0;
+ copySize(m);
+ }
+}
+
+
+template<typename _Tp> inline
+UMat::UMat(const std::vector<_Tp>& vec, bool copyData)
+: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG), dims(2), rows((int)vec.size()),
+cols(1), allocator(0), u(0), offset(0), size(&rows)
+{
+ if(vec.empty())
+ return;
+ if( !copyData )
+ {
+ // !!!TODO!!!
+ CV_Error(Error::StsNotImplemented, "");
+ }
+ else
+ Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
+}
+
+
+inline
+UMat::~UMat()
+{
+ release();
+ if( step.p != step.buf )
+ fastFree(step.p);
+}
+
+inline
+UMat& UMat::operator = (const UMat& m)
+{
+ if( this != &m )
+ {
+ if( m.u )
+ CV_XADD(&(m.u->urefcount), 1);
+ release();
+ flags = m.flags;
+ if( dims <= 2 && m.dims <= 2 )
+ {
+ dims = m.dims;
+ rows = m.rows;
+ cols = m.cols;
+ step[0] = m.step[0];
+ step[1] = m.step[1];
+ }
+ else
+ copySize(m);
+ allocator = m.allocator;
+ u = m.u;
+ offset = m.offset;
+ }
+ return *this;
+}
+
+inline
+UMat UMat::row(int y) const
+{
+ return UMat(*this, Range(y, y + 1), Range::all());
+}
+
+inline
+UMat UMat::col(int x) const
+{
+ return UMat(*this, Range::all(), Range(x, x + 1));
+}
+
+inline
+UMat UMat::rowRange(int startrow, int endrow) const
+{
+ return UMat(*this, Range(startrow, endrow), Range::all());
+}
+
+inline
+UMat UMat::rowRange(const Range& r) const
+{
+ return UMat(*this, r, Range::all());
+}
+
+inline
+UMat UMat::colRange(int startcol, int endcol) const
+{
+ return UMat(*this, Range::all(), Range(startcol, endcol));
+}
+
+inline
+UMat UMat::colRange(const Range& r) const
+{
+ return UMat(*this, Range::all(), r);
+}
+
+inline
+UMat UMat::clone() const
+{
+ UMat m;
+ copyTo(m);
+ return m;
+}
+
+inline
+void UMat::assignTo( UMat& m, int _type ) const
+{
+ if( _type < 0 )
+ m = *this;
+ else
+ convertTo(m, _type);
+}
+
+inline
+void UMat::create(int _rows, int _cols, int _type)
+{
+ _type &= TYPE_MASK;
+ if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u )
+ return;
+ int sz[] = {_rows, _cols};
+ create(2, sz, _type);
+}
+
+inline
+void UMat::create(Size _sz, int _type)
+{
+ create(_sz.height, _sz.width, _type);
+}
+
+inline
+void UMat::addref()
+{
+ if( u )
+ CV_XADD(&(u->urefcount), 1);
+}
+
+inline void UMat::release()
+{
+ if( u && CV_XADD(&(u->urefcount), -1) == 1 )
+ deallocate();
+ size.p[0] = 0;
+ u = 0;
+}
+
+inline
+UMat UMat::operator()( Range _rowRange, Range _colRange ) const
+{
+ return UMat(*this, _rowRange, _colRange);
+}
+
+inline
+UMat UMat::operator()( const Rect& roi ) const
+{
+ return UMat(*this, roi);
+}
+
+inline
+UMat UMat::operator()(const Range* ranges) const
+{
+ return UMat(*this, ranges);
+}
+
+inline
+bool UMat::isContinuous() const
+{
+ return (flags & CONTINUOUS_FLAG) != 0;
+}
+
+inline
+bool UMat::isSubmatrix() const
+{
+ return (flags & SUBMATRIX_FLAG) != 0;
+}
+
+inline
+size_t UMat::elemSize() const
+{
+ return dims > 0 ? step.p[dims - 1] : 0;
+}
+
+inline
+size_t UMat::elemSize1() const
+{
+ return CV_ELEM_SIZE1(flags);
+}
+
+inline
+int UMat::type() const
+{
+ return CV_MAT_TYPE(flags);
+}
+
+inline
+int UMat::depth() const
+{
+ return CV_MAT_DEPTH(flags);
+}
+
+inline
+int UMat::channels() const
+{
+ return CV_MAT_CN(flags);
+}
+
+inline
+size_t UMat::step1(int i) const
+{
+ return step.p[i] / elemSize1();
+}
+
+inline
+bool UMat::empty() const
+{
+ return u == 0 || total() == 0;
+}
+
+inline
+size_t UMat::total() const
+{
+ if( dims <= 2 )
+ return (size_t)rows * cols;
+ size_t p = 1;
+ for( int i = 0; i < dims; i++ )
+ p *= size[i];
+ return p;
+}
+
+inline bool UMatData::hostCopyObsolete() const { return (flags & HOST_COPY_OBSOLETE) != 0; }
+inline bool UMatData::deviceCopyObsolete() const { return (flags & DEVICE_COPY_OBSOLETE) != 0; }
+inline bool UMatData::copyOnMap() const { return (flags & COPY_ON_MAP) != 0; }
+inline bool UMatData::tempUMat() const { return (flags & TEMP_UMAT) != 0; }
+inline bool UMatData::tempCopiedUMat() const { return (flags & TEMP_COPIED_UMAT) == TEMP_COPIED_UMAT; }
+
+inline void UMatData::markHostCopyObsolete(bool flag)
+{
+ if(flag)
+ flags |= HOST_COPY_OBSOLETE;
+ else
+ flags &= ~HOST_COPY_OBSOLETE;
+}
+inline void UMatData::markDeviceCopyObsolete(bool flag)
+{
+ if(flag)
+ flags |= DEVICE_COPY_OBSOLETE;
+ else
+ flags &= ~DEVICE_COPY_OBSOLETE;
+}
+
+inline UMatDataAutoLock::UMatDataAutoLock(UMatData* _u) : u(_u) { u->lock(); }
+inline UMatDataAutoLock::~UMatDataAutoLock() { u->unlock(); }
+
} //cv
#endif
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_OPENCL_HPP__
+#define __OPENCV_OPENCL_HPP__
+
+#include "opencv2/core.hpp"
+
+namespace cv { namespace ocl {
+
+CV_EXPORTS bool haveOpenCL();
+CV_EXPORTS bool useOpenCL();
+CV_EXPORTS void setUseOpenCL(bool flag);
+CV_EXPORTS void finish();
+
+class CV_EXPORTS Context;
+class CV_EXPORTS Device;
+class CV_EXPORTS Kernel;
+class CV_EXPORTS Program;
+class CV_EXPORTS ProgramSource;
+class CV_EXPORTS Queue;
+
+class CV_EXPORTS Device
+{
+public:
+ Device();
+ explicit Device(void* d);
+ Device(const Device& d);
+ Device& operator = (const Device& d);
+ ~Device();
+
+ void set(void* d);
+
+ enum
+ {
+ TYPE_DEFAULT = (1 << 0),
+ TYPE_CPU = (1 << 1),
+ TYPE_GPU = (1 << 2),
+ TYPE_ACCELERATOR = (1 << 3),
+ TYPE_DGPU = TYPE_GPU + (1 << 16),
+ TYPE_IGPU = TYPE_GPU + (1 << 17),
+ TYPE_ALL = 0xFFFFFFFF
+ };
+
+ String name() const;
+ String extensions() const;
+ String vendor() const;
+ String OpenCL_C_Version() const;
+ String OpenCLVersion() const;
+ String driverVersion() const;
+ void* ptr() const;
+
+ int type() const;
+
+ int addressBits() const;
+ bool available() const;
+ bool compilerAvailable() const;
+ bool linkerAvailable() const;
+
+ enum
+ {
+ FP_DENORM=(1 << 0),
+ FP_INF_NAN=(1 << 1),
+ FP_ROUND_TO_NEAREST=(1 << 2),
+ FP_ROUND_TO_ZERO=(1 << 3),
+ FP_ROUND_TO_INF=(1 << 4),
+ FP_FMA=(1 << 5),
+ FP_SOFT_FLOAT=(1 << 6),
+ FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
+ };
+ int doubleFPConfig() const;
+ int singleFPConfig() const;
+ int halfFPConfig() const;
+
+ bool endianLittle() const;
+ bool errorCorrectionSupport() const;
+
+ enum
+ {
+ EXEC_KERNEL=(1 << 0),
+ EXEC_NATIVE_KERNEL=(1 << 1)
+ };
+ int executionCapabilities() const;
+
+ size_t globalMemCacheSize() const;
+
+ enum
+ {
+ NO_CACHE=0,
+ READ_ONLY_CACHE=1,
+ READ_WRITE_CACHE=2
+ };
+ int globalMemCacheType() const;
+ int globalMemCacheLineSize() const;
+ size_t globalMemSize() const;
+
+ size_t localMemSize() const;
+ enum
+ {
+ NO_LOCAL_MEM=0,
+ LOCAL_IS_LOCAL=1,
+ LOCAL_IS_GLOBAL=2
+ };
+ int localMemType() const;
+ bool hostUnifiedMemory() const;
+
+ bool imageSupport() const;
+
+ size_t image2DMaxWidth() const;
+ size_t image2DMaxHeight() const;
+
+ size_t image3DMaxWidth() const;
+ size_t image3DMaxHeight() const;
+ size_t image3DMaxDepth() const;
+
+ size_t imageMaxBufferSize() const;
+ size_t imageMaxArraySize() const;
+
+ int maxClockFrequency() const;
+ int maxComputeUnits() const;
+ int maxConstantArgs() const;
+ size_t maxConstantBufferSize() const;
+
+ size_t maxMemAllocSize() const;
+ size_t maxParameterSize() const;
+
+ int maxReadImageArgs() const;
+ int maxWriteImageArgs() const;
+ int maxSamplers() const;
+
+ size_t maxWorkGroupSize() const;
+ int maxWorkItemDims() const;
+ void maxWorkItemSizes(size_t*) const;
+
+ int memBaseAddrAlign() const;
+
+ int nativeVectorWidthChar() const;
+ int nativeVectorWidthShort() const;
+ int nativeVectorWidthInt() const;
+ int nativeVectorWidthLong() const;
+ int nativeVectorWidthFloat() const;
+ int nativeVectorWidthDouble() const;
+ int nativeVectorWidthHalf() const;
+
+ int preferredVectorWidthChar() const;
+ int preferredVectorWidthShort() const;
+ int preferredVectorWidthInt() const;
+ int preferredVectorWidthLong() const;
+ int preferredVectorWidthFloat() const;
+ int preferredVectorWidthDouble() const;
+ int preferredVectorWidthHalf() const;
+
+ size_t printfBufferSize() const;
+ size_t profilingTimerResolution() const;
+
+ static const Device& getDefault();
+
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+
+class CV_EXPORTS Context
+{
+public:
+ Context();
+ explicit Context(int dtype);
+ ~Context();
+ Context(const Context& c);
+ Context& operator = (const Context& c);
+
+ bool create(int dtype);
+ size_t ndevices() const;
+ const Device& device(size_t idx) const;
+ Program getProg(const ProgramSource& prog,
+ const String& buildopt, String& errmsg);
+
+ static Context& getDefault();
+ void* ptr() const;
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+
+class CV_EXPORTS Queue
+{
+public:
+ Queue();
+ explicit Queue(const Context& c, const Device& d=Device());
+ ~Queue();
+ Queue(const Queue& q);
+ Queue& operator = (const Queue& q);
+
+ bool create(const Context& c=Context(), const Device& d=Device());
+ void finish();
+ void* ptr() const;
+ static Queue& getDefault();
+
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+
+class CV_EXPORTS KernelArg
+{
+public:
+ enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8 };
+ KernelArg(int _flags, UMat* _m, void* _obj=0, size_t _sz=0);
+
+ static KernelArg Local() { return KernelArg(LOCAL, 0); }
+ static KernelArg ReadOnly(const UMat& m) { return KernelArg(READ_ONLY, (UMat*)&m); }
+ static KernelArg WriteOnly(const UMat& m) { return KernelArg(WRITE_ONLY, (UMat*)&m); }
+ static KernelArg Constant(const Mat& m);
+ template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
+ { return KernelArg(CONSTANT, 0, (void*)arr, n); }
+
+ int flags;
+ UMat* m;
+ void* obj;
+ size_t sz;
+};
+
+class CV_EXPORTS Kernel
+{
+public:
+ Kernel();
+ Kernel(const char* kname, const Program& prog);
+ Kernel(const char* kname, const ProgramSource& prog,
+ const String& buildopts, String& errmsg);
+ ~Kernel();
+ Kernel(const Kernel& k);
+ Kernel& operator = (const Kernel& k);
+
+ bool create(const char* kname, const Program& prog);
+ bool create(const char* kname, const ProgramSource& prog,
+ const String& buildopts, String& errmsg);
+
+ void set(int i, const void* value, size_t sz);
+ void set(int i, const UMat& m);
+ void set(int i, const KernelArg& arg);
+ template<typename _Tp> void set(int i, const _Tp& value)
+ { return set(i, &value, sizeof(value)); }
+
+ template<typename _Tp0>
+ Kernel& args(const _Tp0& a0)
+ {
+ set(0, a0); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1)
+ {
+ set(0, a0); set(1, a1); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
+ {
+ set(0, a0); set(1, a1); set(2, a2); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+ const _Tp3& a3, const _Tp4& a4)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2,
+ typename _Tp3, typename _Tp4, typename _Tp5>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
+ const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
+ {
+ set(0, a0); set(1, a1); set(2, a2);
+ set(3, a3); set(4, a4); set(5, a5); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3);
+ set(4, a4); set(5, a5); set(6, a6); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3);
+ set(4, a4); set(5, a5); set(6, a6); set(7, a7); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+ typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4);
+ set(5, a5); set(6, a6); set(7, a7); set(8, a8); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
+ typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+ typename _Tp8, typename _Tp9, typename _Tp10>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); return *this;
+ }
+
+ template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
+ typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
+ typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
+ Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
+ const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
+ const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
+ {
+ set(0, a0); set(1, a1); set(2, a2); set(3, a3); set(4, a4); set(5, a5);
+ set(6, a6); set(7, a7); set(8, a8); set(9, a9); set(10, a10); set(11, a11); return *this;
+ }
+
+ void run(int dims, size_t offset[], size_t globalsize[],
+ size_t localsize[], bool sync, const Queue& q=Queue());
+ void runTask(bool sync, const Queue& q=Queue());
+
+ size_t workGroupSize() const;
+ bool compileWorkGroupSize(size_t wsz[]) const;
+ size_t localMemSize() const;
+
+ void* ptr() const;
+ struct Impl;
+
+protected:
+ Impl* p;
+};
+
+class CV_EXPORTS Program
+{
+public:
+ Program();
+ Program(const ProgramSource& src,
+ const String& buildflags, String& errmsg);
+ explicit Program(const String& buf);
+ Program(const Program& prog);
+
+ Program& operator = (const Program& prog);
+ ~Program();
+
+ bool create(const ProgramSource& src,
+ const String& buildflags, String& errmsg);
+ bool read(const String& buf, const String& buildflags);
+ bool write(String& buf) const;
+
+ const ProgramSource& source() const;
+ void* ptr() const;
+
+ String getPrefix() const;
+ static String getPrefix(const String& buildflags);
+
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+
+class CV_EXPORTS ProgramSource
+{
+public:
+ typedef uint64 hash_t;
+
+ ProgramSource();
+ explicit ProgramSource(const String& prog);
+ explicit ProgramSource(const char* prog);
+ ~ProgramSource();
+ ProgramSource(const ProgramSource& prog);
+ ProgramSource& operator = (const ProgramSource& prog);
+
+ const String& source() const;
+ hash_t hash() const;
+
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+}}
+
+#endif
_mv.release();
return;
}
- CV_Assert( !_mv.fixedType() || CV_MAT_TYPE(_mv.flags) == m.depth() );
+ CV_Assert( !_mv.fixedType() || _mv.empty() || _mv.type() == m.depth() );
_mv.create(m.channels(), 1, m.depth());
Mat* dst = &_mv.getMatRef(0);
split(m, dst);
MatExpr e;
if(m.kind() == _InputArray::EXPR)
{
- const MatExpr& me = *(const MatExpr*)m.obj;
+ const MatExpr& me = *(const MatExpr*)m.getObj();
me.op->multiply(MatExpr(*this), me, e, scale);
}
else
namespace cv {
+class StdMatAllocator : public MatAllocator
+{
+public:
+ UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const
+ {
+ size_t total = CV_ELEM_SIZE(type);
+ for( int i = dims-1; i >= 0; i-- )
+ {
+ if( step )
+ step[i] = total;
+ total *= sizes[i];
+ }
+ uchar* data = (uchar*)fastMalloc(total);
+ UMatData* u = new UMatData(this);
+ u->data = u->origdata = data;
+ u->size = total;
+ u->refcount = 1;
+
+ return u;
+ }
+
+ bool allocate(UMatData* u, int accessFlags) const
+ {
+ if(!u) return false;
+ if(u->handle != 0)
+ return true;
+ return UMat::getStdAllocator()->allocate(u, accessFlags);
+ }
+
+ void deallocate(UMatData* u) const
+ {
+ if(u)
+ {
+ fastFree(u->origdata);
+ delete u;
+ }
+ }
+
+ void map(UMatData*, int) const
+ {
+ }
+
+ void unmap(UMatData* u) const
+ {
+ if(u->urefcount == 0)
+ deallocate(u);
+ }
+
+ void download(UMatData* u, void* dstptr,
+ int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dststep[]) const
+ {
+ if(!u)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* srcptr = u->data;
+ for( int i = 0; i < dims; i++ )
+ {
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( srcofs )
+ srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+ isz[i] = (int)sz[i];
+ }
+
+ Mat src(dims, isz, CV_8U, srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
+
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+ }
+
+ void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
+ const size_t dstofs[], const size_t dststep[],
+ const size_t srcstep[]) const
+ {
+ if(!u)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* dstptr = u->data;
+ for( int i = 0; i < dims; i++ )
+ {
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( dstofs )
+ dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+ isz[i] = (int)sz[i];
+ }
+
+ Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
+
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+ }
+
+ void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dstofs[], const size_t dststep[], bool) const
+ {
+ if(!usrc || !udst)
+ return;
+ int isz[CV_MAX_DIM];
+ uchar* srcptr = usrc->data;
+ uchar* dstptr = udst->data;
+ for( int i = 0; i < dims; i++ )
+ {
+ CV_Assert( sz[i] <= (size_t)INT_MAX );
+ if( sz[i] == 0 )
+ return;
+ if( srcofs )
+ srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
+ if( dstofs )
+ dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
+ isz[i] = (int)sz[i];
+ }
+
+ Mat src(dims, isz, CV_8U, srcptr, srcstep);
+ Mat dst(dims, isz, CV_8U, dstptr, dststep);
+
+ const Mat* arrays[] = { &src, &dst };
+ uchar* ptrs[2];
+ NAryMatIterator it(arrays, ptrs, 2);
+ size_t j, planesz = it.size;
+
+ for( j = 0; j < it.nplanes; j++, ++it )
+ memcpy(ptrs[1], ptrs[0], planesz);
+ }
+};
+
+
+MatAllocator* Mat::getStdAllocator()
+{
+ static StdMatAllocator allocator;
+ return &allocator;
+}
+
void swap( Mat& a, Mat& b )
{
std::swap(a.flags, b.flags);
std::swap(a.rows, b.rows);
std::swap(a.cols, b.cols);
std::swap(a.data, b.data);
- std::swap(a.refcount, b.refcount);
std::swap(a.datastart, b.datastart);
std::swap(a.dataend, b.dataend);
std::swap(a.datalimit, b.datalimit);
std::swap(a.allocator, b.allocator);
+ std::swap(a.u, b.u);
std::swap(a.size.p, b.size.p);
std::swap(a.step.p, b.step.p);
int d = m.dims;
if( d > 2 )
m.rows = m.cols = -1;
+ if(m.u)
+ m.data = m.datastart = m.u->data;
if( m.data )
{
m.datalimit = m.datastart + m.size[0]*m.step[0];
if( total() > 0 )
{
+ MatAllocator *a = allocator, *a0 = getStdAllocator();
#ifdef HAVE_TGPU
- if( !allocator || allocator == tegra::getAllocator() ) allocator = tegra::getAllocator(d, _sizes, _type);
+ if( !a || a == tegra::getAllocator() )
+ a = tegra::getAllocator(d, _sizes, _type);
#endif
- if( !allocator )
+ if(!a)
+ a = a0;
+ try
{
- size_t totalsize = alignSize(step.p[0]*size.p[0], (int)sizeof(*refcount));
- data = datastart = (uchar*)fastMalloc(totalsize + (int)sizeof(*refcount));
- refcount = (int*)(data + totalsize);
- *refcount = 1;
+ u = a->allocate(dims, size, _type, step.p);
+ CV_Assert(u != 0);
}
- else
+ catch(...)
{
-#ifdef HAVE_TGPU
- try
- {
- allocator->allocate(dims, size, _type, refcount, datastart, data, step.p);
- CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
- }catch(...)
- {
- allocator = 0;
- size_t totalSize = alignSize(step.p[0]*size.p[0], (int)sizeof(*refcount));
- data = datastart = (uchar*)fastMalloc(totalSize + (int)sizeof(*refcount));
- refcount = (int*)(data + totalSize);
- *refcount = 1;
- }
-#else
- allocator->allocate(dims, size, _type, refcount, datastart, data, step.p);
- CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
-#endif
+ if(a != a0)
+ u = a0->allocate(dims, size, _type, step.p);
+ CV_Assert(u != 0);
}
+ CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
}
finalizeHdr(*this);
void Mat::deallocate()
{
- if( allocator )
- allocator->deallocate(refcount, datastart, data);
- else
- {
- CV_DbgAssert(refcount != 0);
- fastFree(datastart);
- }
+ if(u)
+ (u->currAllocator ? u->currAllocator : allocator ? allocator : getStdAllocator())->unmap(u);
}
-
Mat::Mat(const Mat& m, const Range& _rowRange, const Range& _colRange)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
CV_Assert( m.dims >= 2 );
if( m.dims > 2 )
Mat::Mat(const Mat& m, const Rect& roi)
: flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
- data(m.data + roi.y*m.step[0]), refcount(m.refcount),
+ data(m.data + roi.y*m.step[0]),
datastart(m.datastart), dataend(m.dataend), datalimit(m.datalimit),
- allocator(m.allocator), size(&rows)
+ allocator(m.allocator), u(m.u), size(&rows)
{
CV_Assert( m.dims <= 2 );
flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
data += roi.x*esz;
CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
- if( refcount )
- CV_XADD(refcount, 1);
+ if( u )
+ CV_XADD(&u->refcount, 1);
if( roi.width < m.cols || roi.height < m.rows )
flags |= SUBMATRIX_FLAG;
Mat::Mat(int _dims, const int* _sizes, int _type, void* _data, const size_t* _steps)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
flags |= CV_MAT_TYPE(_type);
data = datastart = (uchar*)_data;
Mat::Mat(const Mat& m, const Range* ranges)
- : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0),
- datalimit(0), allocator(0), size(&rows)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0),
+ datalimit(0), allocator(0), u(0), size(&rows)
{
int i, d = m.dims;
Input/Output Array
\*************************************************************************************************/
-_InputArray::_InputArray() : flags(0), obj(0) {}
-_InputArray::_InputArray(const Mat& m) : flags(MAT), obj((void*)&m) {}
-_InputArray::_InputArray(const std::vector<Mat>& vec) : flags(STD_VECTOR_MAT), obj((void*)&vec) {}
-_InputArray::_InputArray(const double& val) : flags(FIXED_TYPE + FIXED_SIZE + MATX + CV_64F), obj((void*)&val), sz(Size(1,1)) {}
-_InputArray::_InputArray(const MatExpr& expr) : flags(FIXED_TYPE + FIXED_SIZE + EXPR), obj((void*)&expr) {}
-_InputArray::_InputArray(const cuda::GpuMat& d_mat) : flags(GPU_MAT), obj((void*)&d_mat) {}
-_InputArray::_InputArray(const ogl::Buffer& buf) : flags(OPENGL_BUFFER), obj((void*)&buf) {}
-_InputArray::_InputArray(const cuda::CudaMem& cuda_mem) : flags(CUDA_MEM), obj((void*)&cuda_mem) {}
-
-_InputArray::~_InputArray() {}
-
Mat _InputArray::getMat(int i) const
{
int k = kind();
+ int accessFlags = flags & ACCESS_MASK;
if( k == MAT )
{
return m->row(i);
}
+ if( k == UMAT )
+ {
+ const UMat* m = (const UMat*)obj;
+ if( i < 0 )
+ return m->getMat(accessFlags);
+ return m->getMat(accessFlags).row(i);
+ }
+
if( k == EXPR )
{
CV_Assert( i < 0 );
return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat();
}
- if( k == OCL_MAT )
- {
- CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
- }
-
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
return v[i];
}
+ if( k == STD_VECTOR_UMAT )
+ {
+ const std::vector<UMat>& v = *(const std::vector<UMat>*)obj;
+ CV_Assert( 0 <= i && i < (int)v.size() );
+
+ return v[i].getMat(accessFlags);
+ }
+
if( k == OPENGL_BUFFER )
{
CV_Assert( i < 0 );
return Mat();
}
- CV_Assert( k == CUDA_MEM );
- //if( k == CUDA_MEM )
+ if( k == CUDA_MEM )
{
CV_Assert( i < 0 );
return cuda_mem->createMatHeader();
}
+
+ CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
+ return Mat();
+}
+
+
+UMat _InputArray::getUMat(int i) const
+{
+ int k = kind();
+ int accessFlags = flags & ACCESS_MASK;
+
+ if( k == UMAT )
+ {
+ const UMat* m = (const UMat*)obj;
+ if( i < 0 )
+ return *m;
+ return m->row(i);
+ }
+
+ if( k == STD_VECTOR_UMAT )
+ {
+ const std::vector<UMat>& v = *(const std::vector<UMat>*)obj;
+ CV_Assert( 0 <= i && i < (int)v.size() );
+
+ return v[i];
+ }
+
+ if( k == MAT )
+ {
+ const Mat* m = (const Mat*)obj;
+ if( i < 0 )
+ return m->getUMat(accessFlags);
+ return m->row(i).getUMat(accessFlags);
+ }
+
+ return getMat(i).getUMat(accessFlags);
}
void _InputArray::getMatVector(std::vector<Mat>& mv) const
{
int k = kind();
+ int accessFlags = flags & ACCESS_MASK;
if( k == MAT )
{
return;
}
- if( k == OCL_MAT )
+ if( k == STD_VECTOR_MAT )
{
- CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
+ const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
+ size_t i, n = v.size();
+ mv.resize(n);
+
+ for( i = 0; i < n; i++ )
+ mv[i] = v[i];
+ return;
}
- CV_Assert( k == STD_VECTOR_MAT );
- //if( k == STD_VECTOR_MAT )
+ if( k == STD_VECTOR_UMAT )
{
- const std::vector<Mat>& v = *(const std::vector<Mat>*)obj;
- mv.resize(v.size());
- std::copy(v.begin(), v.end(), mv.begin());
+ const std::vector<UMat>& v = *(const std::vector<UMat>*)obj;
+ size_t i, n = v.size();
+ mv.resize(n);
+
+ for( i = 0; i < n; i++ )
+ mv[i] = v[i].getMat(accessFlags);
return;
}
+
+ CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
cuda::GpuMat _InputArray::getGpuMat() const
return ((const MatExpr*)obj)->size();
}
+ if( k == UMAT )
+ {
+ CV_Assert( i < 0 );
+ return ((const UMat*)obj)->size();
+ }
+
if( k == MATX )
{
CV_Assert( i < 0 );
return ((const Mat*)obj)->total();
}
+ if( k == UMAT )
+ {
+ CV_Assert( i < 0 );
+ return ((const UMat*)obj)->total();
+ }
+
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
if( k == MAT )
return ((const Mat*)obj)->type();
+ if( k == UMAT )
+ return ((const UMat*)obj)->type();
+
if( k == EXPR )
return ((const MatExpr*)obj)->type();
if( k == STD_VECTOR_MAT )
{
const std::vector<Mat>& vv = *(const std::vector<Mat>*)obj;
+ if( vv.empty() )
+ {
+ CV_Assert((flags & FIXED_TYPE) != 0);
+ return CV_MAT_TYPE(flags);
+ }
CV_Assert( i < (int)vv.size() );
-
return vv[i >= 0 ? i : 0].type();
}
if( k == MAT )
return ((const Mat*)obj)->empty();
+ if( k == UMAT )
+ return ((const UMat*)obj)->empty();
+
if( k == EXPR )
return false;
}
-_OutputArray::_OutputArray() {}
-_OutputArray::_OutputArray(Mat& m) : _InputArray(m) {}
-_OutputArray::_OutputArray(std::vector<Mat>& vec) : _InputArray(vec) {}
-_OutputArray::_OutputArray(cuda::GpuMat& d_mat) : _InputArray(d_mat) {}
-_OutputArray::_OutputArray(ogl::Buffer& buf) : _InputArray(buf) {}
-_OutputArray::_OutputArray(cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {}
-
-_OutputArray::_OutputArray(const Mat& m) : _InputArray(m) {flags |= FIXED_SIZE|FIXED_TYPE;}
-_OutputArray::_OutputArray(const std::vector<Mat>& vec) : _InputArray(vec) {flags |= FIXED_SIZE;}
-_OutputArray::_OutputArray(const cuda::GpuMat& d_mat) : _InputArray(d_mat) {flags |= FIXED_SIZE|FIXED_TYPE;}
-_OutputArray::_OutputArray(const ogl::Buffer& buf) : _InputArray(buf) {flags |= FIXED_SIZE|FIXED_TYPE;}
-_OutputArray::_OutputArray(const cuda::CudaMem& cuda_mem) : _InputArray(cuda_mem) {flags |= FIXED_SIZE|FIXED_TYPE;}
-
-_OutputArray::~_OutputArray() {}
-
bool _OutputArray::fixedSize() const
{
return (flags & FIXED_SIZE) == FIXED_SIZE;
((Mat*)obj)->create(_sz, mtype);
return;
}
+ if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+ {
+ CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == _sz);
+ CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
+ ((UMat*)obj)->create(_sz, mtype);
+ return;
+ }
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
((Mat*)obj)->create(rows, cols, mtype);
return;
}
+ if( k == UMAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
+ {
+ CV_Assert(!fixedSize() || ((UMat*)obj)->size.operator()() == Size(cols, rows));
+ CV_Assert(!fixedType() || ((UMat*)obj)->type() == mtype);
+ ((UMat*)obj)->create(rows, cols, mtype);
+ return;
+ }
if( k == GPU_MAT && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(cols, rows));
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
}
-void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool allowTransposed, int fixedDepthMask) const
+void _OutputArray::create(int dims, const int* sizes, int mtype, int i,
+ bool allowTransposed, int fixedDepthMask) const
{
int k = kind();
mtype = CV_MAT_TYPE(mtype);
return;
}
+ if( k == UMAT )
+ {
+ CV_Assert( i < 0 );
+ UMat& m = *(UMat*)obj;
+ if( allowTransposed )
+ {
+ if( !m.isContinuous() )
+ {
+ CV_Assert(!fixedType() && !fixedSize());
+ m.release();
+ }
+
+ if( dims == 2 && m.dims == 2 && !m.empty() &&
+ m.type() == mtype && m.rows == sizes[1] && m.cols == sizes[0] )
+ return;
+ }
+
+ if(fixedType())
+ {
+ if(CV_MAT_CN(mtype) == m.channels() && ((1 << CV_MAT_TYPE(flags)) & fixedDepthMask) != 0 )
+ mtype = m.type();
+ else
+ CV_Assert(CV_MAT_TYPE(mtype) == m.type());
+ }
+ if(fixedSize())
+ {
+ CV_Assert(m.dims == dims);
+ for(int j = 0; j < dims; ++j)
+ CV_Assert(m.size[j] == sizes[j]);
+ }
+ m.create(dims, sizes, mtype);
+ return;
+ }
+
if( k == MATX )
{
CV_Assert( i < 0 );
return;
}
- if( k == OCL_MAT )
- {
- CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
- }
-
if( k == NONE )
{
CV_Error(CV_StsNullPtr, "create() called for the missing output array" );
return;
}
- CV_Assert( k == STD_VECTOR_MAT );
- //if( k == STD_VECTOR_MAT )
+ if( k == STD_VECTOR_MAT )
{
std::vector<Mat>& v = *(std::vector<Mat>*)obj;
}
m.create(dims, sizes, mtype);
+ return;
}
+
+ CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
void _OutputArray::release() const
return;
}
- if( k == OCL_MAT )
- {
- CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet");
- }
-
- CV_Assert( k == STD_VECTOR_MAT );
- //if( k == STD_VECTOR_MAT )
+ if( k == STD_VECTOR_MAT )
{
((std::vector<Mat>*)obj)->clear();
+ return;
}
+
+ CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
void _OutputArray::clear() const
return *(cuda::CudaMem*)obj;
}
-static _OutputArray _none;
-OutputArray noArray() { return _none; }
+static _InputOutputArray _none;
+InputOutputArray noArray() { return _none; }
}
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+#include <map>
+
+/*
+ Part of the file is an extract from the standard OpenCL headers from Khronos site.
+ Below is the original copyright.
+*/
+
+/*******************************************************************************
+ * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#if 0 //defined __APPLE__
+#define HAVE_OPENCL 1
+#else
+#undef HAVE_OPENCL
+#endif
+
+#define OPENCV_CL_NOT_IMPLEMENTED -1000
+
+#ifdef HAVE_OPENCL
+
+#if defined __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+static const bool g_haveOpenCL = true;
+
+#else
+
+extern "C" {
+
+struct _cl_platform_id { int dummy; };
+struct _cl_device_id { int dummy; };
+struct _cl_context { int dummy; };
+struct _cl_command_queue { int dummy; };
+struct _cl_mem { int dummy; };
+struct _cl_program { int dummy; };
+struct _cl_kernel { int dummy; };
+struct _cl_event { int dummy; };
+struct _cl_sampler { int dummy; };
+
+typedef struct _cl_platform_id * cl_platform_id;
+typedef struct _cl_device_id * cl_device_id;
+typedef struct _cl_context * cl_context;
+typedef struct _cl_command_queue * cl_command_queue;
+typedef struct _cl_mem * cl_mem;
+typedef struct _cl_program * cl_program;
+typedef struct _cl_kernel * cl_kernel;
+typedef struct _cl_event * cl_event;
+typedef struct _cl_sampler * cl_sampler;
+
+typedef int cl_int;
+typedef unsigned cl_uint;
+typedef long cl_long;
+typedef unsigned long cl_ulong;
+
+typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
+typedef cl_ulong cl_bitfield;
+typedef cl_bitfield cl_device_type;
+typedef cl_uint cl_platform_info;
+typedef cl_uint cl_device_info;
+typedef cl_bitfield cl_device_fp_config;
+typedef cl_uint cl_device_mem_cache_type;
+typedef cl_uint cl_device_local_mem_type;
+typedef cl_bitfield cl_device_exec_capabilities;
+typedef cl_bitfield cl_command_queue_properties;
+typedef intptr_t cl_device_partition_property;
+typedef cl_bitfield cl_device_affinity_domain;
+
+typedef intptr_t cl_context_properties;
+typedef cl_uint cl_context_info;
+typedef cl_uint cl_command_queue_info;
+typedef cl_uint cl_channel_order;
+typedef cl_uint cl_channel_type;
+typedef cl_bitfield cl_mem_flags;
+typedef cl_uint cl_mem_object_type;
+typedef cl_uint cl_mem_info;
+typedef cl_bitfield cl_mem_migration_flags;
+typedef cl_uint cl_image_info;
+typedef cl_uint cl_buffer_create_type;
+typedef cl_uint cl_addressing_mode;
+typedef cl_uint cl_filter_mode;
+typedef cl_uint cl_sampler_info;
+typedef cl_bitfield cl_map_flags;
+typedef cl_uint cl_program_info;
+typedef cl_uint cl_program_build_info;
+typedef cl_uint cl_program_binary_type;
+typedef cl_int cl_build_status;
+typedef cl_uint cl_kernel_info;
+typedef cl_uint cl_kernel_arg_info;
+typedef cl_uint cl_kernel_arg_address_qualifier;
+typedef cl_uint cl_kernel_arg_access_qualifier;
+typedef cl_bitfield cl_kernel_arg_type_qualifier;
+typedef cl_uint cl_kernel_work_group_info;
+typedef cl_uint cl_event_info;
+typedef cl_uint cl_command_type;
+typedef cl_uint cl_profiling_info;
+
+
+typedef struct _cl_image_format {
+ cl_channel_order image_channel_order;
+ cl_channel_type image_channel_data_type;
+} cl_image_format;
+
+typedef struct _cl_image_desc {
+ cl_mem_object_type image_type;
+ size_t image_width;
+ size_t image_height;
+ size_t image_depth;
+ size_t image_array_size;
+ size_t image_row_pitch;
+ size_t image_slice_pitch;
+ cl_uint num_mip_levels;
+ cl_uint num_samples;
+ cl_mem buffer;
+} cl_image_desc;
+
+typedef struct _cl_buffer_region {
+ size_t origin;
+ size_t size;
+} cl_buffer_region;
+
+
+//////////////////////////////////////////////////////////
+
+#define CL_SUCCESS 0
+#define CL_DEVICE_NOT_FOUND -1
+#define CL_DEVICE_NOT_AVAILABLE -2
+#define CL_COMPILER_NOT_AVAILABLE -3
+#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
+#define CL_OUT_OF_RESOURCES -5
+#define CL_OUT_OF_HOST_MEMORY -6
+#define CL_PROFILING_INFO_NOT_AVAILABLE -7
+#define CL_MEM_COPY_OVERLAP -8
+#define CL_IMAGE_FORMAT_MISMATCH -9
+#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
+#define CL_BUILD_PROGRAM_FAILURE -11
+#define CL_MAP_FAILURE -12
+#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
+#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
+#define CL_COMPILE_PROGRAM_FAILURE -15
+#define CL_LINKER_NOT_AVAILABLE -16
+#define CL_LINK_PROGRAM_FAILURE -17
+#define CL_DEVICE_PARTITION_FAILED -18
+#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
+
+#define CL_INVALID_VALUE -30
+#define CL_INVALID_DEVICE_TYPE -31
+#define CL_INVALID_PLATFORM -32
+#define CL_INVALID_DEVICE -33
+#define CL_INVALID_CONTEXT -34
+#define CL_INVALID_QUEUE_PROPERTIES -35
+#define CL_INVALID_COMMAND_QUEUE -36
+#define CL_INVALID_HOST_PTR -37
+#define CL_INVALID_MEM_OBJECT -38
+#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
+#define CL_INVALID_IMAGE_SIZE -40
+#define CL_INVALID_SAMPLER -41
+#define CL_INVALID_BINARY -42
+#define CL_INVALID_BUILD_OPTIONS -43
+#define CL_INVALID_PROGRAM -44
+#define CL_INVALID_PROGRAM_EXECUTABLE -45
+#define CL_INVALID_KERNEL_NAME -46
+#define CL_INVALID_KERNEL_DEFINITION -47
+#define CL_INVALID_KERNEL -48
+#define CL_INVALID_ARG_INDEX -49
+#define CL_INVALID_ARG_VALUE -50
+#define CL_INVALID_ARG_SIZE -51
+#define CL_INVALID_KERNEL_ARGS -52
+#define CL_INVALID_WORK_DIMENSION -53
+#define CL_INVALID_WORK_GROUP_SIZE -54
+#define CL_INVALID_WORK_ITEM_SIZE -55
+#define CL_INVALID_GLOBAL_OFFSET -56
+#define CL_INVALID_EVENT_WAIT_LIST -57
+#define CL_INVALID_EVENT -58
+#define CL_INVALID_OPERATION -59
+#define CL_INVALID_GL_OBJECT -60
+#define CL_INVALID_BUFFER_SIZE -61
+#define CL_INVALID_MIP_LEVEL -62
+#define CL_INVALID_GLOBAL_WORK_SIZE -63
+#define CL_INVALID_PROPERTY -64
+#define CL_INVALID_IMAGE_DESCRIPTOR -65
+#define CL_INVALID_COMPILER_OPTIONS -66
+#define CL_INVALID_LINKER_OPTIONS -67
+#define CL_INVALID_DEVICE_PARTITION_COUNT -68
+
+/*#define CL_VERSION_1_0 1
+#define CL_VERSION_1_1 1
+#define CL_VERSION_1_2 1*/
+
+#define CL_FALSE 0
+#define CL_TRUE 1
+#define CL_BLOCKING CL_TRUE
+#define CL_NON_BLOCKING CL_FALSE
+
+#define CL_PLATFORM_PROFILE 0x0900
+#define CL_PLATFORM_VERSION 0x0901
+#define CL_PLATFORM_NAME 0x0902
+#define CL_PLATFORM_VENDOR 0x0903
+#define CL_PLATFORM_EXTENSIONS 0x0904
+
+#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
+#define CL_DEVICE_TYPE_CPU (1 << 1)
+#define CL_DEVICE_TYPE_GPU (1 << 2)
+#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
+#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
+#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
+#define CL_DEVICE_TYPE 0x1000
+#define CL_DEVICE_VENDOR_ID 0x1001
+#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
+#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
+#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
+#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
+#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
+#define CL_DEVICE_ADDRESS_BITS 0x100D
+#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
+#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
+#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
+#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
+#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
+#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
+#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
+#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
+#define CL_DEVICE_IMAGE_SUPPORT 0x1016
+#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
+#define CL_DEVICE_MAX_SAMPLERS 0x1018
+#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
+#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
+#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
+#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
+#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
+#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
+#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
+#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
+#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
+#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
+#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
+#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
+#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
+#define CL_DEVICE_ENDIAN_LITTLE 0x1026
+#define CL_DEVICE_AVAILABLE 0x1027
+#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
+#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
+#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
+#define CL_DEVICE_NAME 0x102B
+#define CL_DEVICE_VENDOR 0x102C
+#define CL_DRIVER_VERSION 0x102D
+#define CL_DEVICE_PROFILE 0x102E
+#define CL_DEVICE_VERSION 0x102F
+#define CL_DEVICE_EXTENSIONS 0x1030
+#define CL_DEVICE_PLATFORM 0x1031
+#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
+#define CL_DEVICE_HALF_FP_CONFIG 0x1033
+#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
+#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
+#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
+#define CL_DEVICE_OPENCL_C_VERSION 0x103D
+#define CL_DEVICE_LINKER_AVAILABLE 0x103E
+#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
+#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
+#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
+#define CL_DEVICE_PARENT_DEVICE 0x1042
+#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
+#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
+#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
+#define CL_DEVICE_PARTITION_TYPE 0x1046
+#define CL_DEVICE_REFERENCE_COUNT 0x1047
+#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
+#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
+#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
+#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
+
+#define CL_FP_DENORM (1 << 0)
+#define CL_FP_INF_NAN (1 << 1)
+#define CL_FP_ROUND_TO_NEAREST (1 << 2)
+#define CL_FP_ROUND_TO_ZERO (1 << 3)
+#define CL_FP_ROUND_TO_INF (1 << 4)
+#define CL_FP_FMA (1 << 5)
+#define CL_FP_SOFT_FLOAT (1 << 6)
+#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
+
+#define CL_NONE 0x0
+#define CL_READ_ONLY_CACHE 0x1
+#define CL_READ_WRITE_CACHE 0x2
+#define CL_LOCAL 0x1
+#define CL_GLOBAL 0x2
+#define CL_EXEC_KERNEL (1 << 0)
+#define CL_EXEC_NATIVE_KERNEL (1 << 1)
+#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
+#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
+
+#define CL_CONTEXT_REFERENCE_COUNT 0x1080
+#define CL_CONTEXT_DEVICES 0x1081
+#define CL_CONTEXT_PROPERTIES 0x1082
+#define CL_CONTEXT_NUM_DEVICES 0x1083
+#define CL_CONTEXT_PLATFORM 0x1084
+#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
+
+#define CL_DEVICE_PARTITION_EQUALLY 0x1086
+#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
+#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
+#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
+#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
+#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
+#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
+#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
+#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
+#define CL_QUEUE_CONTEXT 0x1090
+#define CL_QUEUE_DEVICE 0x1091
+#define CL_QUEUE_REFERENCE_COUNT 0x1092
+#define CL_QUEUE_PROPERTIES 0x1093
+#define CL_MEM_READ_WRITE (1 << 0)
+#define CL_MEM_WRITE_ONLY (1 << 1)
+#define CL_MEM_READ_ONLY (1 << 2)
+#define CL_MEM_USE_HOST_PTR (1 << 3)
+#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
+#define CL_MEM_COPY_HOST_PTR (1 << 5)
+// reserved (1 << 6)
+#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
+#define CL_MEM_HOST_READ_ONLY (1 << 8)
+#define CL_MEM_HOST_NO_ACCESS (1 << 9)
+#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
+#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
+
+#define CL_R 0x10B0
+#define CL_A 0x10B1
+#define CL_RG 0x10B2
+#define CL_RA 0x10B3
+#define CL_RGB 0x10B4
+#define CL_RGBA 0x10B5
+#define CL_BGRA 0x10B6
+#define CL_ARGB 0x10B7
+#define CL_INTENSITY 0x10B8
+#define CL_LUMINANCE 0x10B9
+#define CL_Rx 0x10BA
+#define CL_RGx 0x10BB
+#define CL_RGBx 0x10BC
+#define CL_DEPTH 0x10BD
+#define CL_DEPTH_STENCIL 0x10BE
+
+#define CL_SNORM_INT8 0x10D0
+#define CL_SNORM_INT16 0x10D1
+#define CL_UNORM_INT8 0x10D2
+#define CL_UNORM_INT16 0x10D3
+#define CL_UNORM_SHORT_565 0x10D4
+#define CL_UNORM_SHORT_555 0x10D5
+#define CL_UNORM_INT_101010 0x10D6
+#define CL_SIGNED_INT8 0x10D7
+#define CL_SIGNED_INT16 0x10D8
+#define CL_SIGNED_INT32 0x10D9
+#define CL_UNSIGNED_INT8 0x10DA
+#define CL_UNSIGNED_INT16 0x10DB
+#define CL_UNSIGNED_INT32 0x10DC
+#define CL_HALF_FLOAT 0x10DD
+#define CL_FLOAT 0x10DE
+#define CL_UNORM_INT24 0x10DF
+
+#define CL_MEM_OBJECT_BUFFER 0x10F0
+#define CL_MEM_OBJECT_IMAGE2D 0x10F1
+#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+#define CL_MEM_OBJECT_IMAGE1D 0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+
+#define CL_MEM_TYPE 0x1100
+#define CL_MEM_FLAGS 0x1101
+#define CL_MEM_SIZE 0x1102
+#define CL_MEM_HOST_PTR 0x1103
+#define CL_MEM_MAP_COUNT 0x1104
+#define CL_MEM_REFERENCE_COUNT 0x1105
+#define CL_MEM_CONTEXT 0x1106
+#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
+#define CL_MEM_OFFSET 0x1108
+
+#define CL_IMAGE_FORMAT 0x1110
+#define CL_IMAGE_ELEMENT_SIZE 0x1111
+#define CL_IMAGE_ROW_PITCH 0x1112
+#define CL_IMAGE_SLICE_PITCH 0x1113
+#define CL_IMAGE_WIDTH 0x1114
+#define CL_IMAGE_HEIGHT 0x1115
+#define CL_IMAGE_DEPTH 0x1116
+#define CL_IMAGE_ARRAY_SIZE 0x1117
+#define CL_IMAGE_BUFFER 0x1118
+#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
+#define CL_IMAGE_NUM_SAMPLES 0x111A
+
+#define CL_ADDRESS_NONE 0x1130
+#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
+#define CL_ADDRESS_CLAMP 0x1132
+#define CL_ADDRESS_REPEAT 0x1133
+#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
+
+#define CL_FILTER_NEAREST 0x1140
+#define CL_FILTER_LINEAR 0x1141
+
+#define CL_SAMPLER_REFERENCE_COUNT 0x1150
+#define CL_SAMPLER_CONTEXT 0x1151
+#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
+#define CL_SAMPLER_ADDRESSING_MODE 0x1153
+#define CL_SAMPLER_FILTER_MODE 0x1154
+
+#define CL_MAP_READ (1 << 0)
+#define CL_MAP_WRITE (1 << 1)
+#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
+
+#define CL_PROGRAM_REFERENCE_COUNT 0x1160
+#define CL_PROGRAM_CONTEXT 0x1161
+#define CL_PROGRAM_NUM_DEVICES 0x1162
+#define CL_PROGRAM_DEVICES 0x1163
+#define CL_PROGRAM_SOURCE 0x1164
+#define CL_PROGRAM_BINARY_SIZES 0x1165
+#define CL_PROGRAM_BINARIES 0x1166
+#define CL_PROGRAM_NUM_KERNELS 0x1167
+#define CL_PROGRAM_KERNEL_NAMES 0x1168
+#define CL_PROGRAM_BUILD_STATUS 0x1181
+#define CL_PROGRAM_BUILD_OPTIONS 0x1182
+#define CL_PROGRAM_BUILD_LOG 0x1183
+#define CL_PROGRAM_BINARY_TYPE 0x1184
+#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
+#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
+#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
+#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
+
+#define CL_BUILD_SUCCESS 0
+#define CL_BUILD_NONE -1
+#define CL_BUILD_ERROR -2
+#define CL_BUILD_IN_PROGRESS -3
+
+#define CL_KERNEL_FUNCTION_NAME 0x1190
+#define CL_KERNEL_NUM_ARGS 0x1191
+#define CL_KERNEL_REFERENCE_COUNT 0x1192
+#define CL_KERNEL_CONTEXT 0x1193
+#define CL_KERNEL_PROGRAM 0x1194
+#define CL_KERNEL_ATTRIBUTES 0x1195
+#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
+#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
+#define CL_KERNEL_ARG_TYPE_NAME 0x1198
+#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
+#define CL_KERNEL_ARG_NAME 0x119A
+#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
+#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
+#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
+#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
+#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
+#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
+#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
+#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
+#define CL_KERNEL_ARG_TYPE_NONE 0
+#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
+#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
+#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
+#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
+#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
+#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
+#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
+#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
+#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
+
+#define CL_EVENT_COMMAND_QUEUE 0x11D0
+#define CL_EVENT_COMMAND_TYPE 0x11D1
+#define CL_EVENT_REFERENCE_COUNT 0x11D2
+#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
+#define CL_EVENT_CONTEXT 0x11D4
+
+#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
+#define CL_COMMAND_TASK 0x11F1
+#define CL_COMMAND_NATIVE_KERNEL 0x11F2
+#define CL_COMMAND_READ_BUFFER 0x11F3
+#define CL_COMMAND_WRITE_BUFFER 0x11F4
+#define CL_COMMAND_COPY_BUFFER 0x11F5
+#define CL_COMMAND_READ_IMAGE 0x11F6
+#define CL_COMMAND_WRITE_IMAGE 0x11F7
+#define CL_COMMAND_COPY_IMAGE 0x11F8
+#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
+#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
+#define CL_COMMAND_MAP_BUFFER 0x11FB
+#define CL_COMMAND_MAP_IMAGE 0x11FC
+#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
+#define CL_COMMAND_MARKER 0x11FE
+#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
+#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
+#define CL_COMMAND_READ_BUFFER_RECT 0x1201
+#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
+#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
+#define CL_COMMAND_USER 0x1204
+#define CL_COMMAND_BARRIER 0x1205
+#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
+#define CL_COMMAND_FILL_BUFFER 0x1207
+#define CL_COMMAND_FILL_IMAGE 0x1208
+
+#define CL_COMPLETE 0x0
+#define CL_RUNNING 0x1
+#define CL_SUBMITTED 0x2
+#define CL_QUEUED 0x3
+#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
+
+#define CL_PROFILING_COMMAND_QUEUED 0x1280
+#define CL_PROFILING_COMMAND_SUBMIT 0x1281
+#define CL_PROFILING_COMMAND_START 0x1282
+#define CL_PROFILING_COMMAND_END 0x1283
+
+#define CL_CALLBACK CV_STDCALL
+
+static volatile bool g_haveOpenCL = false;
+static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
+
+#if defined(__APPLE__)
+#include <dlfcn.h>
+
+static void* initOpenCLAndLoad(const char* funcname)
+{
+ static bool initialized = false;
+ static void* handle = 0;
+ if (!handle)
+ {
+ if(!initialized)
+ {
+ handle = dlopen("/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL", RTLD_LAZY);
+ initialized = true;
+ g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
+ }
+ if(!handle)
+ return 0;
+ }
+
+ return funcname ? dlsym(handle, funcname) : 0;
+}
+
+#elif defined WIN32 || defined _WIN32
+
+#ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
+ #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
+#endif
+#include <windows.h>
+#if (_WIN32_WINNT >= 0x0602)
+ #include <synchapi.h>
+#endif
+#undef small
+#undef min
+#undef max
+#undef abs
+
+static void* initOpenCLAndLoad(const char* funcname)
+{
+ static bool initialized = false;
+ static HMODULE handle = 0;
+ if (!handle)
+ {
+ if(!initialized)
+ {
+ handle = LoadLibraryA("OpenCL.dll");
+ initialized = true;
+ g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
+ }
+ if(!handle)
+ return 0;
+ }
+
+ return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
+}
+
+#elif defined(__linux)
+
+#include <dlfcn.h>
+#include <stdio.h>
+
+static void* initOpenCLAndLoad(const char* funcname)
+{
+ static bool initialized = false;
+ static void* handle = 0;
+ if (!handle)
+ {
+ if(!initialized)
+ {
+ handle = dlopen("libOpenCL.so", RTLD_LAZY);
+ if(!handle)
+ handle = dlopen("libCL.so", RTLD_LAZY);
+ initialized = true;
+ g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
+ }
+ if(!handle)
+ return 0;
+ }
+
+ return funcname ? (void*)dlsym(handle, funcname) : 0;
+}
+
+#else
+
+static void* initOpenCLAndLoad(const char*)
+{
+ return 0;
+}
+
+#endif
+
+
+#define OCL_FUNC(rettype, funcname, argsdecl, args) \
+ typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
+ static rettype funcname argsdecl \
+ { \
+ static funcname##_t funcname##_p = 0; \
+ if( !funcname##_p ) \
+ { \
+ funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
+ if( !funcname##_p ) \
+ return OPENCV_CL_NOT_IMPLEMENTED; \
+ } \
+ return funcname##_p args; \
+ }
+
+
+#define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
+ typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
+ static rettype funcname argsdecl \
+ { \
+ static funcname##_t funcname##_p = 0; \
+ if( !funcname##_p ) \
+ { \
+ funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
+ if( !funcname##_p ) \
+ { \
+ if( errcode_ret ) \
+ *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
+ return 0; \
+ } \
+ } \
+ return funcname##_p args; \
+ }
+
+OCL_FUNC(cl_int, clGetPlatformIDs,
+ (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
+ (num_entries, platforms, num_platforms))
+
+OCL_FUNC(cl_int, clGetPlatformInfo,
+ (cl_platform_id platform, cl_platform_info param_name,
+ size_t param_value_size, void * param_value,
+ size_t * param_value_size_ret),
+ (platform, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clGetDeviceInfo,
+ (cl_device_id device,
+ cl_device_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (device, param_name, param_value_size, param_value, param_value_size_ret))
+
+
+OCL_FUNC(cl_int, clGetDeviceIDs,
+ (cl_platform_id platform,
+ cl_device_type device_type,
+ cl_uint num_entries,
+ cl_device_id * devices,
+ cl_uint * num_devices),
+ (platform, device_type, num_entries, devices, num_devices))
+
+OCL_FUNC_P(cl_context, clCreateContext,
+ (const cl_context_properties * properties,
+ cl_uint num_devices,
+ const cl_device_id * devices,
+ void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
+ void * user_data,
+ cl_int * errcode_ret),
+ (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
+
+OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
+
+/*
+OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
+
+OCL_FUNC_P(cl_context, clCreateContextFromType,
+ (const cl_context_properties * properties,
+ cl_device_type device_type,
+ void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
+ void * user_data,
+ cl_int * errcode_ret),
+ (properties, device_type, pfn_notify, user_data, errcode_ret))
+
+OCL_FUNC(cl_int, clGetContextInfo,
+ (cl_context context,
+ cl_context_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (context, param_name, param_value_size,
+ param_value, param_value_size_ret))
+*/
+OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
+ (cl_context context,
+ cl_device_id device,
+ cl_command_queue_properties properties,
+ cl_int * errcode_ret),
+ (context, device, properties, errcode_ret))
+
+OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
+
+OCL_FUNC_P(cl_mem, clCreateBuffer,
+ (cl_context context,
+ cl_mem_flags flags,
+ size_t size,
+ void * host_ptr,
+ cl_int * errcode_ret),
+ (context, flags, size, host_ptr, errcode_ret))
+
+/*
+OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
+
+OCL_FUNC(cl_int, clGetCommandQueueInfo,
+ (cl_command_queue command_queue,
+ cl_command_queue_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC_P(cl_mem, clCreateSubBuffer,
+ (cl_mem buffer,
+ cl_mem_flags flags,
+ cl_buffer_create_type buffer_create_type,
+ const void * buffer_create_info,
+ cl_int * errcode_ret),
+ (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
+
+OCL_FUNC_P(cl_mem, clCreateImage,
+ (cl_context context,
+ cl_mem_flags flags,
+ const cl_image_format * image_format,
+ const cl_image_desc * image_desc,
+ void * host_ptr,
+ cl_int * errcode_ret),
+ (context, flags, image_format, image_desc, host_ptr, errcode_ret))
+
+OCL_FUNC(cl_int, clGetSupportedImageFormats,
+ (cl_context context,
+ cl_mem_flags flags,
+ cl_mem_object_type image_type,
+ cl_uint num_entries,
+ cl_image_format * image_formats,
+ cl_uint * num_image_formats),
+ (context, flags, image_type, num_entries, image_formats, num_image_formats))
+
+OCL_FUNC(cl_int, clGetMemObjectInfo,
+ (cl_mem memobj,
+ cl_mem_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (memobj, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clGetImageInfo,
+ (cl_mem image,
+ cl_image_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (image, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clCreateKernelsInProgram,
+ (cl_program program,
+ cl_uint num_kernels,
+ cl_kernel * kernels,
+ cl_uint * num_kernels_ret),
+ (program, num_kernels, kernels, num_kernels_ret))
+
+OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
+
+OCL_FUNC(cl_int, clGetKernelArgInfo,
+ (cl_kernel kernel,
+ cl_uint arg_indx,
+ cl_kernel_arg_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clEnqueueReadImage,
+ (cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_read,
+ const size_t * origin[3],
+ const size_t * region[3],
+ size_t row_pitch,
+ size_t slice_pitch,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, image, blocking_read, origin, region,
+ row_pitch, slice_pitch,
+ ptr,
+ num_events_in_wait_list,
+ event_wait_list,
+ event))
+
+OCL_FUNC(cl_int, clEnqueueWriteImage,
+ (cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_write,
+ const size_t * origin[3],
+ const size_t * region[3],
+ size_t input_row_pitch,
+ size_t input_slice_pitch,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, image, blocking_write, origin, region, input_row_pitch,
+ input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueFillImage,
+ (cl_command_queue command_queue,
+ cl_mem image,
+ const void * fill_color,
+ const size_t * origin[3],
+ const size_t * region[3],
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, image, fill_color, origin, region,
+ num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueCopyImage,
+ (cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_image,
+ const size_t * src_origin[3],
+ const size_t * dst_origin[3],
+ const size_t * region[3],
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, src_image, dst_image, src_origin, dst_origin,
+ region, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
+ (cl_command_queue command_queue,
+ cl_mem src_image,
+ cl_mem dst_buffer,
+ const size_t * src_origin[3],
+ const size_t * region[3],
+ size_t dst_offset,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
+ num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
+ (cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_image,
+ size_t src_offset,
+ const size_t * dst_origin[3],
+ const size_t * region[3],
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, src_buffer, dst_image, src_offset, dst_origin,
+ region, num_events_in_wait_list, event_wait_list, event))
+
+
+OCL_FUNC_P(void*, clEnqueueMapImage,
+ (cl_command_queue command_queue,
+ cl_mem image,
+ cl_bool blocking_map,
+ cl_map_flags map_flags,
+ const size_t * origin[3],
+ const size_t * region[3],
+ size_t * image_row_pitch,
+ size_t * image_slice_pitch,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event,
+ cl_int * errcode_ret),
+ (command_queue, image, blocking_map, map_flags, origin, region,
+ image_row_pitch, image_slice_pitch, num_events_in_wait_list,
+ event_wait_list, event, errcode_ret))
+
+OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
+
+OCL_FUNC(cl_int, clGetKernelInfo,
+ (cl_kernel kernel,
+ cl_kernel_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (kernel, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
+
+*/
+
+OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
+
+
+OCL_FUNC_P(cl_program, clCreateProgramWithSource,
+ (cl_context context,
+ cl_uint count,
+ const char ** strings,
+ const size_t * lengths,
+ cl_int * errcode_ret),
+ (context, count, strings, lengths, errcode_ret))
+
+OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
+ (cl_context context,
+ cl_uint num_devices,
+ const cl_device_id * device_list,
+ const size_t * lengths,
+ const unsigned char ** binaries,
+ cl_int * binary_status,
+ cl_int * errcode_ret),
+ (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
+
+OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
+
+OCL_FUNC(cl_int, clBuildProgram,
+ (cl_program program,
+ cl_uint num_devices,
+ const cl_device_id * device_list,
+ const char * options,
+ void (CL_CALLBACK * pfn_notify)(cl_program, void *),
+ void * user_data),
+ (program, num_devices, device_list, options, pfn_notify, user_data))
+
+OCL_FUNC(cl_int, clGetProgramInfo,
+ (cl_program program,
+ cl_program_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (program, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clGetProgramBuildInfo,
+ (cl_program program,
+ cl_device_id device,
+ cl_program_build_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (program, device, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC_P(cl_kernel, clCreateKernel,
+ (cl_program program,
+ const char * kernel_name,
+ cl_int * errcode_ret),
+ (program, kernel_name, errcode_ret))
+
+OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
+
+OCL_FUNC(cl_int, clSetKernelArg,
+ (cl_kernel kernel,
+ cl_uint arg_index,
+ size_t arg_size,
+ const void * arg_value),
+ (kernel, arg_index, arg_size, arg_value))
+
+OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
+ (cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret),
+ (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
+
+OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
+
+OCL_FUNC(cl_int, clEnqueueReadBuffer,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_read,
+ size_t offset,
+ size_t size,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, buffer, blocking_read, offset, size, ptr,
+ num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueReadBufferRect,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_read,
+ const size_t * buffer_offset,
+ const size_t * host_offset,
+ const size_t * region,
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
+ buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
+ event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueWriteBuffer,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_write,
+ size_t offset,
+ size_t size,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, buffer, blocking_write, offset, size, ptr,
+ num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_write,
+ const size_t * buffer_offset,
+ const size_t * host_offset,
+ const size_t * region,
+ size_t buffer_row_pitch,
+ size_t buffer_slice_pitch,
+ size_t host_row_pitch,
+ size_t host_slice_pitch,
+ const void * ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, buffer, blocking_write, buffer_offset, host_offset,
+ region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
+ host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
+
+/*OCL_FUNC(cl_int, clEnqueueFillBuffer,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ const void * pattern,
+ size_t pattern_size,
+ size_t offset,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, buffer, pattern, pattern_size, offset, size,
+ num_events_in_wait_list, event_wait_list, event))*/
+
+OCL_FUNC(cl_int, clEnqueueCopyBuffer,
+ (cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ size_t src_offset,
+ size_t dst_offset,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
+ size, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
+ (cl_command_queue command_queue,
+ cl_mem src_buffer,
+ cl_mem dst_buffer,
+ const size_t * src_origin,
+ const size_t * dst_origin,
+ const size_t * region,
+ size_t src_row_pitch,
+ size_t src_slice_pitch,
+ size_t dst_row_pitch,
+ size_t dst_slice_pitch,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
+ region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
+ num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC_P(void*, clEnqueueMapBuffer,
+ (cl_command_queue command_queue,
+ cl_mem buffer,
+ cl_bool blocking_map,
+ cl_map_flags map_flags,
+ size_t offset,
+ size_t size,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event,
+ cl_int * errcode_ret),
+ (command_queue, buffer, blocking_map, map_flags, offset, size,
+ num_events_in_wait_list, event_wait_list, event, errcode_ret))
+
+OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
+ (cl_command_queue command_queue,
+ cl_mem memobj,
+ void * mapped_ptr,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
+ (cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t * global_work_offset,
+ const size_t * global_work_size,
+ const size_t * local_work_size,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, kernel, work_dim, global_work_offset, global_work_size,
+ local_work_size, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clEnqueueTask,
+ (cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event),
+ (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
+
+OCL_FUNC(cl_int, clSetEventCallback,
+ (cl_event event,
+ cl_int command_exec_callback_type ,
+ void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
+ void *user_data),
+ (event, command_exec_callback_type, pfn_event_notify, user_data))
+
+OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
+
+}
+
+#endif
+
+namespace cv { namespace ocl {
+
+struct UMat2D
+{
+ UMat2D(const UMat& m, int accessFlags)
+ {
+ CV_Assert(m.dims == 2);
+ data = (cl_mem)m.handle(accessFlags);
+ offset = m.offset;
+ step = m.step;
+ rows = m.rows;
+ cols = m.cols;
+ }
+ cl_mem data;
+ size_t offset;
+ size_t step;
+ int rows;
+ int cols;
+};
+
+struct UMat3D
+{
+ UMat3D(const UMat& m, int accessFlags)
+ {
+ CV_Assert(m.dims == 3);
+ data = (cl_mem)m.handle(accessFlags);
+ offset = m.offset;
+ step = m.step.p[1];
+ slicestep = m.step.p[0];
+ slices = m.size.p[0];
+ rows = m.size.p[1];
+ cols = m.size.p[2];
+ }
+ cl_mem data;
+ size_t offset;
+ size_t slicestep;
+ size_t step;
+ int slices;
+ int rows;
+ int cols;
+};
+
+// Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
+static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
+{
+ static uint64 table[256];
+ static bool initialized = false;
+
+ if( !initialized )
+ {
+ for( int i = 0; i < 256; i++ )
+ {
+ uint64 c = i;
+ for( int j = 0; j < 8; j++ )
+ c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
+ table[i] = c;
+ }
+ initialized = true;
+ }
+
+ uint64 crc = ~crc0;
+ for( size_t idx = 0; idx < size; idx++ )
+ crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
+
+ return ~crc;
+}
+
+struct HashKey
+{
+ typedef uint64 part;
+ HashKey(part _a, part _b) : a(_a), b(_b) {}
+ part a, b;
+};
+
+inline bool operator == (const HashKey& h1, const HashKey& h2)
+{
+ return h1.a == h2.a && h1.b == h2.b;
+}
+
+inline bool operator < (const HashKey& h1, const HashKey& h2)
+{
+ return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
+}
+
+bool haveOpenCL()
+{
+ initOpenCLAndLoad(0);
+ return g_haveOpenCL;
+}
+
+bool useOpenCL()
+{
+ TLSData* data = TLSData::get();
+ if( data->useOpenCL < 0 )
+ data->useOpenCL = (int)haveOpenCL();
+ return data->useOpenCL > 0;
+}
+
+void setUseOpenCL(bool flag)
+{
+ if( haveOpenCL() )
+ {
+ TLSData* data = TLSData::get();
+ data->useOpenCL = flag ? 1 : 0;
+ }
+}
+
+void finish()
+{
+ Queue::getDefault().finish();
+}
+
+#define IMPLEMENT_REFCOUNTABLE() \
+ void addref() { CV_XADD(&refcount, 1); } \
+ void release() { if( CV_XADD(&refcount, -1) == 1 ) delete this; } \
+ int refcount
+
+class Platform
+{
+public:
+ Platform();
+ ~Platform();
+ Platform(const Platform& p);
+ Platform& operator = (const Platform& p);
+
+ void* ptr() const;
+ static Platform& getDefault();
+protected:
+ struct Impl;
+ Impl* p;
+};
+
+struct Platform::Impl
+{
+ Impl()
+ {
+ refcount = 1;
+ handle = 0;
+ initialized = false;
+ }
+
+ ~Impl() {}
+
+ void init()
+ {
+ if( !initialized )
+ {
+ //cl_uint num_entries
+ cl_uint n = 0;
+ if( clGetPlatformIDs(1, &handle, &n) < 0 || n == 0 )
+ handle = 0;
+ if( handle != 0 )
+ {
+ char buf[1000];
+ size_t len = 0;
+ clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len);
+ buf[len] = '\0';
+ vendor = String(buf);
+ }
+
+ initialized = true;
+ }
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+
+ cl_platform_id handle;
+ String vendor;
+ bool initialized;
+};
+
+Platform::Platform()
+{
+ p = 0;
+}
+
+Platform::~Platform()
+{
+ if(p)
+ p->release();
+}
+
+Platform::Platform(const Platform& pl)
+{
+ p = (Impl*)pl.p;
+ if(p)
+ p->addref();
+}
+
+Platform& Platform::operator = (const Platform& pl)
+{
+ Impl* newp = (Impl*)pl.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+void* Platform::ptr() const
+{
+ return p ? p->handle : 0;
+}
+
+Platform& Platform::getDefault()
+{
+ static Platform p;
+ if( !p.p )
+ {
+ p.p = new Impl;
+ p.p->init();
+ }
+ return p;
+}
+
+///////////////////////////////////////////////////////////////////////////////////
+
+struct Device::Impl
+{
+ Impl(void* d)
+ {
+ handle = (cl_device_id)d;
+ }
+
+ template<typename _TpCL, typename _TpOut>
+ _TpOut getProp(cl_device_info prop) const
+ {
+ _TpCL temp=_TpCL();
+ size_t sz = 0;
+
+ return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 &&
+ sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
+ }
+
+ bool getBoolProp(cl_device_info prop) const
+ {
+ cl_bool temp = CL_FALSE;
+ size_t sz = 0;
+
+ return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) >= 0 &&
+ sz == sizeof(temp) ? temp != 0 : false;
+ }
+
+ String getStrProp(cl_device_info prop) const
+ {
+ char buf[1024];
+ size_t sz=0;
+ return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) >= 0 &&
+ sz < sizeof(buf) ? String(buf) : String();
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+ cl_device_id handle;
+};
+
+
+Device::Device()
+{
+ p = 0;
+}
+
+Device::Device(void* d)
+{
+ p = 0;
+ set(d);
+}
+
+Device::Device(const Device& d)
+{
+ p = d.p;
+ if(p)
+ p->addref();
+}
+
+Device& Device::operator = (const Device& d)
+{
+ Impl* newp = (Impl*)d.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+Device::~Device()
+{
+ if(p)
+ p->release();
+}
+
+void Device::set(void* d)
+{
+ if(p)
+ p->release();
+ p = new Impl(d);
+}
+
+void* Device::ptr() const
+{
+ return p ? p->handle : 0;
+}
+
+String Device::name() const
+{ return p ? p->getStrProp(CL_DEVICE_NAME) : String(); }
+
+String Device::extensions() const
+{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
+
+String Device::vendor() const
+{ return p ? p->getStrProp(CL_DEVICE_VENDOR) : String(); }
+
+String Device::OpenCL_C_Version() const
+{ return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
+
+String Device::OpenCLVersion() const
+{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
+
+String Device::driverVersion() const
+{ return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
+
+int Device::type() const
+{ return p ? p->getProp<cl_device_type, int>(CL_DEVICE_TYPE) : 0; }
+
+int Device::addressBits() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
+
+bool Device::available() const
+{ return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
+
+bool Device::compilerAvailable() const
+{ return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
+
+bool Device::linkerAvailable() const
+{ return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
+
+int Device::doubleFPConfig() const
+{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG) : 0; }
+
+int Device::singleFPConfig() const
+{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
+
+int Device::halfFPConfig() const
+{ return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
+
+bool Device::endianLittle() const
+{ return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
+
+bool Device::errorCorrectionSupport() const
+{ return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
+
+int Device::executionCapabilities() const
+{ return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
+
+size_t Device::globalMemCacheSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
+
+int Device::globalMemCacheType() const
+{ return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
+
+int Device::globalMemCacheLineSize() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
+
+size_t Device::globalMemSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
+
+size_t Device::localMemSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
+
+int Device::localMemType() const
+{ return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
+
+bool Device::hostUnifiedMemory() const
+{ return p ? p->getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY) : false; }
+
+bool Device::imageSupport() const
+{ return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
+
+size_t Device::image2DMaxWidth() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
+
+size_t Device::image2DMaxHeight() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
+
+size_t Device::image3DMaxWidth() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
+
+size_t Device::image3DMaxHeight() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
+
+size_t Device::image3DMaxDepth() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
+
+size_t Device::imageMaxBufferSize() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
+
+size_t Device::imageMaxArraySize() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
+
+int Device::maxClockFrequency() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
+
+int Device::maxComputeUnits() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS) : 0; }
+
+int Device::maxConstantArgs() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
+
+size_t Device::maxConstantBufferSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
+
+size_t Device::maxMemAllocSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
+
+size_t Device::maxParameterSize() const
+{ return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
+
+int Device::maxReadImageArgs() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
+
+int Device::maxWriteImageArgs() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
+
+int Device::maxSamplers() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
+
+size_t Device::maxWorkGroupSize() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE) : 0; }
+
+int Device::maxWorkItemDims() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
+
+void Device::maxWorkItemSizes(size_t* sizes) const
+{
+ if(p)
+ {
+ const int MAX_DIMS = 32;
+ size_t retsz = 0;
+ clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz);
+ }
+}
+
+int Device::memBaseAddrAlign() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
+
+int Device::nativeVectorWidthChar() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
+
+int Device::nativeVectorWidthShort() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
+
+int Device::nativeVectorWidthInt() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
+
+int Device::nativeVectorWidthLong() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
+
+int Device::nativeVectorWidthFloat() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
+
+int Device::nativeVectorWidthDouble() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
+
+int Device::nativeVectorWidthHalf() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
+
+int Device::preferredVectorWidthChar() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
+
+int Device::preferredVectorWidthShort() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
+
+int Device::preferredVectorWidthInt() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
+
+int Device::preferredVectorWidthLong() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
+
+int Device::preferredVectorWidthFloat() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
+
+int Device::preferredVectorWidthDouble() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
+
+int Device::preferredVectorWidthHalf() const
+{ return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
+
+size_t Device::printfBufferSize() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
+
+size_t Device::profilingTimerResolution() const
+{ return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
+
+const Device& Device::getDefault()
+{
+ const Context& ctx = Context::getDefault();
+ int idx = TLSData::get()->device;
+ return ctx.device(idx);
+}
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+struct Context::Impl
+{
+ Impl(int dtype0)
+ {
+ refcount = 1;
+ handle = 0;
+
+ cl_int retval = 0;
+ cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
+ cl_context_properties prop[] =
+ {
+ CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
+ 0
+ };
+
+ cl_uint i, nd0 = 0, nd = 0;
+ int dtype = dtype0 & 15;
+ clGetDeviceIDs( pl, dtype, 0, 0, &nd0 );
+ if(retval < 0)
+ return;
+ AutoBuffer<void*> dlistbuf(nd0*2+1);
+ cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
+ cl_device_id* dlist_new = dlist + nd0;
+ clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 );
+ String name0;
+
+ for(i = 0; i < nd0; i++)
+ {
+ Device d(dlist[i]);
+ if( !d.available() || !d.compilerAvailable() )
+ continue;
+ if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
+ continue;
+ if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
+ continue;
+ String name = d.name();
+ if( nd != 0 && name != name0 )
+ continue;
+ name0 = name;
+ dlist_new[nd++] = dlist[i];
+ }
+
+ if(nd == 0)
+ return;
+
+ // !!! in the current implementation force the number of devices to 1 !!!
+ nd = 1;
+
+ handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
+ bool ok = handle != 0 && retval >= 0;
+ if( ok )
+ {
+ devices.resize(nd);
+ for( i = 0; i < nd; i++ )
+ devices[i].set(dlist_new[i]);
+ }
+ }
+
+ ~Impl()
+ {
+ if(handle)
+ clReleaseContext(handle);
+ devices.clear();
+ }
+
+ Program getProg(const ProgramSource& src,
+ const String& buildflags, String& errmsg)
+ {
+ String prefix = Program::getPrefix(buildflags);
+ HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
+ phash_t::iterator it = phash.find(k);
+ if( it != phash.end() )
+ return it->second;
+ //String filename = format("%08x%08x_%08x%08x.clb2",
+ Program prog(src, buildflags, errmsg);
+ phash.insert(std::pair<HashKey,Program>(k, prog));
+ return prog;
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+
+ cl_context handle;
+ std::vector<Device> devices;
+ bool initialized;
+
+ typedef ProgramSource::hash_t hash_t;
+
+ struct HashKey
+ {
+ HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
+ bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
+ bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
+ bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
+ hash_t a, b;
+ };
+ typedef std::map<HashKey, Program> phash_t;
+ phash_t phash;
+};
+
+
+Context::Context()
+{
+ p = 0;
+}
+
+Context::Context(int dtype)
+{
+ p = 0;
+ create(dtype);
+}
+
+bool Context::create(int dtype0)
+{
+ if( !haveOpenCL() )
+ return false;
+ if(p)
+ p->release();
+ p = new Impl(dtype0);
+ if(!p->handle)
+ {
+ delete p;
+ p = 0;
+ }
+ return p != 0;
+}
+
+Context::~Context()
+{
+ p->release();
+}
+
+Context::Context(const Context& c)
+{
+ p = (Impl*)c.p;
+ if(p)
+ p->addref();
+}
+
+Context& Context::operator = (const Context& c)
+{
+ Impl* newp = (Impl*)c.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+void* Context::ptr() const
+{
+ return p->handle;
+}
+
+size_t Context::ndevices() const
+{
+ return p ? p->devices.size() : 0;
+}
+
+const Device& Context::device(size_t idx) const
+{
+ static Device dummy;
+ return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
+}
+
+Context& Context::getDefault()
+{
+ static Context ctx;
+ if( !ctx.p && haveOpenCL() )
+ {
+ // do not create new Context right away.
+ // First, try to retrieve existing context of the same type.
+ // In its turn, Platform::getContext() may call Context::create()
+ // if there is no such context.
+ ctx.create(Device::TYPE_ACCELERATOR);
+ if(!ctx.p)
+ ctx.create(Device::TYPE_DGPU);
+ if(!ctx.p)
+ ctx.create(Device::TYPE_IGPU);
+ if(!ctx.p)
+ ctx.create(Device::TYPE_CPU);
+ }
+
+ return ctx;
+}
+
+Program Context::getProg(const ProgramSource& prog,
+ const String& buildopts, String& errmsg)
+{
+ return p ? p->getProg(prog, buildopts, errmsg) : Program();
+}
+
+struct Queue::Impl
+{
+ Impl(const Context& c, const Device& d)
+ {
+ refcount = 1;
+ const Context* pc = &c;
+ cl_context ch = (cl_context)pc->ptr();
+ if( !ch )
+ {
+ pc = &Context::getDefault();
+ ch = (cl_context)pc->ptr();
+ }
+ cl_device_id dh = (cl_device_id)d.ptr();
+ if( !dh )
+ dh = (cl_device_id)pc->device(0).ptr();
+ cl_int retval = 0;
+ handle = clCreateCommandQueue(ch, dh, 0, &retval);
+ }
+
+ ~Impl()
+ {
+ if(handle)
+ {
+ clFinish(handle);
+ clReleaseCommandQueue(handle);
+ }
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+
+ cl_command_queue handle;
+ bool initialized;
+};
+
+Queue::Queue()
+{
+ p = 0;
+}
+
+Queue::Queue(const Context& c, const Device& d)
+{
+ p = 0;
+ create(c, d);
+}
+
+Queue::Queue(const Queue& q)
+{
+ p = q.p;
+ if(p)
+ p->addref();
+}
+
+Queue& Queue::operator = (const Queue& q)
+{
+ Impl* newp = (Impl*)q.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+Queue::~Queue()
+{
+ if(p)
+ p->release();
+}
+
+bool Queue::create(const Context& c, const Device& d)
+{
+ if(p)
+ p->release();
+ p = new Impl(c, d);
+ return p->handle != 0;
+}
+
+void Queue::finish()
+{
+ if(p && p->handle)
+ clFinish(p->handle);
+}
+
+void* Queue::ptr() const
+{
+ return p ? p->handle : 0;
+}
+
+Queue& Queue::getDefault()
+{
+ Queue& q = TLSData::get()->oclQueue;
+ if( !q.p )
+ q.create(Context::getDefault());
+ return q;
+}
+
+static cl_command_queue getQueue(const Queue& q)
+{
+ cl_command_queue qq = (cl_command_queue)q.ptr();
+ if(!qq)
+ qq = (cl_command_queue)Queue::getDefault().ptr();
+ return qq;
+}
+
+KernelArg::KernelArg(int _flags, UMat* _m, void* _obj, size_t _sz)
+ : flags(_flags), m(_m), obj(_obj), sz(_sz)
+{
+}
+
+KernelArg KernelArg::Constant(const Mat& m)
+{
+ CV_Assert(m.isContinuous());
+ return KernelArg(CONSTANT, 0, m.data, m.total()*m.elemSize());
+}
+
+
+struct Kernel::Impl
+{
+ Impl(const char* kname, const Program& prog)
+ {
+ e = 0; refcount = 1;
+ cl_program ph = (cl_program)prog.ptr();
+ cl_int retval = 0;
+ handle = ph != 0 ?
+ clCreateKernel(ph, kname, &retval) : 0;
+ for( int i = 0; i < MAX_ARRS; i++ )
+ u[i] = 0;
+ }
+
+ void cleanupUMats()
+ {
+ for( int i = 0; i < MAX_ARRS; i++ )
+ if( u[i] )
+ {
+ if( CV_XADD(&u[i]->urefcount, -1) == 1 )
+ u[i]->currAllocator->deallocate(u[i]);
+ u[i] = 0;
+ }
+ nu = 0;
+ }
+
+ void addUMat(const UMat& m)
+ {
+ CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
+ u[nu] = m.u;
+ CV_XADD(&m.u->urefcount, 1);
+ nu++;
+ }
+
+ void finit()
+ {
+ cleanupUMats();
+ if(e) { clReleaseEvent(e); e = 0; }
+ release();
+ }
+
+ ~Impl()
+ {
+ if(handle)
+ clReleaseKernel(handle);
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+
+ cl_kernel handle;
+ cl_event e;
+ enum { MAX_ARRS = 16 };
+ UMatData* u[MAX_ARRS];
+ int nu;
+};
+
+}}
+
+extern "C"
+{
+static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
+{
+ ((cv::ocl::Kernel::Impl*)p)->finit();
+}
+
+}
+
+namespace cv { namespace ocl {
+
+Kernel::Kernel()
+{
+ p = 0;
+}
+
+Kernel::Kernel(const char* kname, const Program& prog)
+{
+ p = 0;
+ create(kname, prog);
+}
+
+Kernel::Kernel(const char* kname, const ProgramSource& src,
+ const String& buildopts, String& errmsg)
+{
+ p = 0;
+ create(kname, src, buildopts, errmsg);
+}
+
+Kernel::Kernel(const Kernel& k)
+{
+ p = k.p;
+ if(p)
+ p->addref();
+}
+
+Kernel& Kernel::operator = (const Kernel& k)
+{
+ Impl* newp = (Impl*)k.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+Kernel::~Kernel()
+{
+ if(p)
+ p->release();
+}
+
+bool Kernel::create(const char* kname, const Program& prog)
+{
+ if(p)
+ p->release();
+ p = new Impl(kname, prog);
+ if(p->handle == 0)
+ {
+ p->release();
+ p = 0;
+ }
+ return p != 0;
+}
+
+bool Kernel::create(const char* kname, const ProgramSource& src,
+ const String& buildopts, String& errmsg)
+{
+ if(p)
+ {
+ p->release();
+ p = 0;
+ }
+ const Program& prog = Context::getDefault().getProg(src, buildopts, errmsg);
+ return create(kname, prog);
+}
+
+void* Kernel::ptr() const
+{
+ return p ? p->handle : 0;
+}
+
+void Kernel::set(int i, const void* value, size_t sz)
+{
+ CV_Assert( p && clSetKernelArg(p->handle, (cl_uint)i, sz, value) >= 0 );
+ if( i == 0 )
+ p->cleanupUMats();
+}
+
+void Kernel::set(int i, const UMat& m)
+{
+ set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
+}
+
+void Kernel::set(int i, const KernelArg& arg)
+{
+ CV_Assert( p && p->handle );
+ if( i == 0 )
+ p->cleanupUMats();
+ if( arg.m )
+ {
+ int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
+ ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
+ if( arg.m->dims <= 2 )
+ {
+ UMat2D u2d(*arg.m, accessFlags);
+ clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d), &u2d);
+ }
+ else
+ {
+ UMat3D u3d(*arg.m, accessFlags);
+ clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d), &u3d);
+ }
+ p->addUMat(*arg.m);
+ }
+ else
+ {
+ clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj);
+ }
+}
+
+
+void Kernel::run(int dims, size_t offset[], size_t globalsize[], size_t localsize[],
+ bool sync, const Queue& q)
+{
+ CV_Assert(p && p->handle && p->e == 0);
+ cl_command_queue qq = getQueue(q);
+ clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
+ offset, globalsize, localsize, 0, 0,
+ sync ? 0 : &p->e);
+ if( sync )
+ {
+ clFinish(qq);
+ p->cleanupUMats();
+ }
+ else
+ {
+ p->addref();
+ clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
+ }
+}
+
+void Kernel::runTask(bool sync, const Queue& q)
+{
+ CV_Assert(p && p->handle && p->e == 0);
+ cl_command_queue qq = getQueue(q);
+ clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
+ if( sync )
+ {
+ clFinish(qq);
+ p->cleanupUMats();
+ }
+ else
+ {
+ p->addref();
+ clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p);
+ }
+}
+
+
+size_t Kernel::workGroupSize() const
+{
+ if(!p)
+ return 0;
+ size_t val = 0, retsz = 0;
+ cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
+ return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
+ sizeof(val), &val, &retsz) >= 0 ? val : 0;
+}
+
+bool Kernel::compileWorkGroupSize(size_t wsz[]) const
+{
+ if(!p || !wsz)
+ return 0;
+ size_t retsz = 0;
+ cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
+ return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
+ sizeof(wsz[0]*3), wsz, &retsz) >= 0;
+}
+
+size_t Kernel::localMemSize() const
+{
+ if(!p)
+ return 0;
+ size_t retsz = 0;
+ cl_ulong val = 0;
+ cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
+ return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
+ sizeof(val), &val, &retsz) >= 0 ? (size_t)val : 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+struct Program::Impl
+{
+ Impl(const ProgramSource& _src,
+ const String& _buildflags, String& errmsg)
+ {
+ refcount = 1;
+ const Context& ctx = Context::getDefault();
+ src = _src;
+ buildflags = _buildflags;
+ const String& srcstr = src.source();
+ const char* srcptr = srcstr.c_str();
+ size_t srclen = srcstr.size();
+ cl_int retval = 0;
+
+ handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
+ if( handle && retval >= 0 )
+ {
+ int i, n = ctx.ndevices();
+ AutoBuffer<void*> deviceListBuf(n+1);
+ void** deviceList = deviceListBuf;
+ for( i = 0; i < n; i++ )
+ deviceList[i] = ctx.device(i).ptr();
+ retval = clBuildProgram(handle, n,
+ (const cl_device_id*)deviceList,
+ buildflags.c_str(), 0, 0);
+ if( retval == CL_BUILD_PROGRAM_FAILURE )
+ {
+ char buf[1024];
+ size_t retsz = 0;
+ clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], CL_PROGRAM_BUILD_LOG,
+ sizeof(buf)-16, buf, &retsz);
+ errmsg = String(buf);
+ }
+ }
+ }
+
+ Impl(const String& _buf, const String& _buildflags)
+ {
+ refcount = 1;
+ handle = 0;
+ buildflags = _buildflags;
+ if(_buf.empty())
+ return;
+ String prefix0 = Program::getPrefix(buildflags);
+ const Context& ctx = Context::getDefault();
+ const Device& dev = Device::getDefault();
+ const char* pos0 = _buf.c_str();
+ const char* pos1 = strchr(pos0, '\n');
+ if(!pos1)
+ return;
+ const char* pos2 = strchr(pos1+1, '\n');
+ if(!pos2)
+ return;
+ const char* pos3 = strchr(pos2+1, '\n');
+ if(!pos3)
+ return;
+ size_t prefixlen = (pos3 - pos0)+1;
+ String prefix(pos0, prefixlen);
+ if( prefix != prefix0 )
+ return;
+ const uchar* bin = (uchar*)(pos3+1);
+ void* devid = dev.ptr();
+ size_t codelen = _buf.length() - prefixlen;
+ cl_int binstatus = 0, retval = 0;
+ handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
+ &codelen, &bin, &binstatus, &retval);
+ }
+
+ String store()
+ {
+ if(!handle)
+ return String();
+ size_t progsz = 0, retsz = 0;
+ String prefix = Program::getPrefix(buildflags);
+ size_t prefixlen = prefix.length();
+ if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) < 0)
+ return String();
+ AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
+ uchar* buf = bufbuf;
+ memcpy(buf, prefix.c_str(), prefixlen);
+ buf += prefixlen;
+ if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) < 0)
+ return String();
+ buf[progsz] = (uchar)'\0';
+ return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
+ }
+
+ ~Impl()
+ {
+ if( handle )
+ clReleaseProgram(handle);
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+
+ ProgramSource src;
+ String buildflags;
+ cl_program handle;
+};
+
+
+Program::Program() { p = 0; }
+
+Program::Program(const ProgramSource& src,
+ const String& buildflags, String& errmsg)
+{
+ p = 0;
+ create(src, buildflags, errmsg);
+}
+
+Program::Program(const Program& prog)
+{
+ p = prog.p;
+ if(p)
+ p->addref();
+}
+
+Program& Program::operator = (const Program& prog)
+{
+ Impl* newp = (Impl*)prog.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+Program::~Program()
+{
+ if(p)
+ p->release();
+}
+
+bool Program::create(const ProgramSource& src,
+ const String& buildflags, String& errmsg)
+{
+ if(p)
+ p->release();
+ p = new Impl(src, buildflags, errmsg);
+ if(!p->handle)
+ {
+ p->release();
+ p = 0;
+ }
+ return p != 0;
+}
+
+const ProgramSource& Program::source() const
+{
+ static ProgramSource dummy;
+ return p ? p->src : dummy;
+}
+
+void* Program::ptr() const
+{
+ return p ? p->handle : 0;
+}
+
+bool Program::read(const String& bin, const String& buildflags)
+{
+ if(p)
+ p->release();
+ p = new Impl(bin, buildflags);
+ return p->handle != 0;
+}
+
+bool Program::write(String& bin) const
+{
+ if(!p)
+ return false;
+ bin = p->store();
+ return !bin.empty();
+}
+
+String Program::getPrefix() const
+{
+ if(!p)
+ return String();
+ return getPrefix(p->buildflags);
+}
+
+String Program::getPrefix(const String& buildflags)
+{
+ const Context& ctx = Context::getDefault();
+ const Device& dev = ctx.device(0);
+ return format("name=%s\ndriver=%s\nbuildflags=%s\n",
+ dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
+}
+
+////////////////////////////////////////////////////////////////////////////////////////
+
+struct ProgramSource::Impl
+{
+ Impl(const char* _src)
+ {
+ init(String(_src));
+ }
+ Impl(const String& _src)
+ {
+ init(_src);
+ }
+ void init(const String& _src)
+ {
+ refcount = 1;
+ src = _src;
+ h = crc64((uchar*)src.c_str(), src.size());
+ }
+
+ IMPLEMENT_REFCOUNTABLE();
+ String src;
+ ProgramSource::hash_t h;
+};
+
+
+ProgramSource::ProgramSource()
+{
+ p = 0;
+}
+
+ProgramSource::ProgramSource(const char* prog)
+{
+ p = new Impl(prog);
+}
+
+ProgramSource::ProgramSource(const String& prog)
+{
+ p = new Impl(prog);
+}
+
+ProgramSource::~ProgramSource()
+{
+ if(p)
+ p->release();
+}
+
+ProgramSource::ProgramSource(const ProgramSource& prog)
+{
+ p = prog.p;
+ if(p)
+ p->addref();
+}
+
+ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
+{
+ Impl* newp = (Impl*)prog.p;
+ if(newp)
+ newp->addref();
+ if(p)
+ p->release();
+ p = newp;
+ return *this;
+}
+
+const String& ProgramSource::source() const
+{
+ static String dummy;
+ return p ? p->src : dummy;
+}
+
+ProgramSource::hash_t ProgramSource::hash() const
+{
+ return p ? p->h : 0;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+
+class OpenCLAllocator : public MatAllocator
+{
+public:
+ OpenCLAllocator() {}
+
+ UMatData* defaultAllocate(int dims, const int* sizes, int type, size_t* step) const
+ {
+ UMatData* u = Mat::getStdAllocator()->allocate(dims, sizes, type, step);
+ u->urefcount = 1;
+ u->refcount = 0;
+ return u;
+ }
+
+ void getBestFlags(const Context& ctx, int& createFlags, int& flags0) const
+ {
+ const Device& dev = ctx.device(0);
+ createFlags = CL_MEM_READ_WRITE;
+
+ if( dev.hostUnifiedMemory() )
+ flags0 = 0;
+ else
+ flags0 = UMatData::COPY_ON_MAP;
+ }
+
+ UMatData* allocate(int dims, const int* sizes, int type, size_t* step) const
+ {
+ if(!useOpenCL())
+ return defaultAllocate(dims, sizes, type, step);
+ size_t total = CV_ELEM_SIZE(type);
+ for( int i = dims-1; i >= 0; i-- )
+ {
+ if( step )
+ step[i] = total;
+ total *= sizes[i];
+ }
+
+ Context& ctx = Context::getDefault();
+ int createFlags = 0, flags0 = 0;
+ getBestFlags(ctx, createFlags, flags0);
+
+ cl_int retval = 0;
+ void* handle = clCreateBuffer((cl_context)ctx.ptr(),
+ createFlags, total, 0, &retval);
+ if( !handle || retval < 0 )
+ return defaultAllocate(dims, sizes, type, step);
+ UMatData* u = new UMatData(this);
+ u->data = 0;
+ u->size = total;
+ u->handle = handle;
+ u->urefcount = 1;
+ u->flags = flags0;
+
+ return u;
+ }
+
+ bool allocate(UMatData* u, int accessFlags) const
+ {
+ if(!u)
+ return false;
+
+ UMatDataAutoLock lock(u);
+
+ if(u->handle == 0)
+ {
+ CV_Assert(u->origdata != 0);
+ Context& ctx = Context::getDefault();
+ int createFlags = 0, flags0 = 0;
+ getBestFlags(ctx, createFlags, flags0);
+
+ cl_context ctx_handle = (cl_context)ctx.ptr();
+ cl_int retval = 0;
+ int tempUMatFlags = UMatData::TEMP_UMAT;
+ u->handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
+ u->size, u->origdata, &retval);
+ if((!u->handle || retval < 0) && !(accessFlags & ACCESS_FAST))
+ {
+ u->handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|createFlags,
+ u->size, u->origdata, &retval);
+ tempUMatFlags = UMatData::TEMP_COPIED_UMAT;
+ }
+ if(!u->handle || retval < 0)
+ return false;
+ u->prevAllocator = u->currAllocator;
+ u->currAllocator = this;
+ u->flags |= tempUMatFlags;
+ }
+ if(accessFlags & ACCESS_WRITE)
+ u->markHostCopyObsolete(true);
+ CV_XADD(&u->urefcount, 1);
+ return true;
+ }
+
+ void deallocate(UMatData* u) const
+ {
+ if(!u)
+ return;
+
+ // TODO: !!! when we add Shared Virtual Memory Support,
+ // this function (as well as the others should be corrected)
+ CV_Assert(u->handle != 0 && u->urefcount == 0);
+ if(u->tempUMat())
+ {
+ if( u->hostCopyObsolete() && u->refcount > 0 && u->tempCopiedUMat() )
+ {
+ clEnqueueWriteBuffer((cl_command_queue)Queue::getDefault().ptr(),
+ (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->origdata, 0, 0, 0);
+ }
+ u->markHostCopyObsolete(false);
+ clReleaseMemObject((cl_mem)u->handle);
+ u->currAllocator = u->prevAllocator;
+ if(u->data && u->copyOnMap())
+ fastFree(u->data);
+ u->data = u->origdata;
+ if(u->refcount == 0)
+ u->currAllocator->deallocate(u);
+ }
+ else
+ {
+ if(u->data && u->copyOnMap())
+ fastFree(u->data);
+ clReleaseMemObject((cl_mem)u->handle);
+ delete u;
+ }
+ }
+
+ void map(UMatData* u, int accessFlags) const
+ {
+ if(!u)
+ return;
+
+ CV_Assert( u->handle != 0 );
+
+ UMatDataAutoLock autolock(u);
+
+ if(accessFlags & ACCESS_WRITE)
+ u->markDeviceCopyObsolete(true);
+
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+ if( u->refcount == 0 )
+ {
+ if( !u->copyOnMap() )
+ {
+ CV_Assert(u->data == 0);
+ // because there can be other map requests for the same UMat with different access flags,
+ // we use the universal (read-write) access mode.
+ cl_int retval = 0;
+ u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
+ (CL_MAP_READ | CL_MAP_WRITE),
+ 0, u->size, 0, 0, 0, &retval);
+ if(u->data && retval >= 0)
+ {
+ u->markHostCopyObsolete(false);
+ return;
+ }
+
+ // if map failed, switch to copy-on-map mode for the particular buffer
+ u->flags |= UMatData::COPY_ON_MAP;
+ }
+
+ if(!u->data)
+ {
+ u->data = (uchar*)fastMalloc(u->size);
+ u->markHostCopyObsolete(true);
+ }
+ }
+
+ if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
+ {
+ CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->data, 0, 0, 0) >= 0 );
+ u->markHostCopyObsolete(false);
+ }
+ }
+
+ void unmap(UMatData* u) const
+ {
+ if(!u)
+ return;
+
+ CV_Assert(u->handle != 0);
+
+ UMatDataAutoLock autolock(u);
+
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+ if( !u->copyOnMap() && u->data )
+ {
+ CV_Assert( clEnqueueUnmapMemObject(q, (cl_mem)u->handle, u->data, 0, 0, 0) >= 0 );
+ u->data = 0;
+ }
+ else if( u->copyOnMap() && u->deviceCopyObsolete() )
+ {
+ CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
+ u->size, u->data, 0, 0, 0) >= 0 );
+ }
+ u->markDeviceCopyObsolete(false);
+ u->markHostCopyObsolete(false);
+ }
+
+ bool checkContinuous(int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dstofs[], const size_t dststep[],
+ size_t& total, size_t new_sz[],
+ size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
+ size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
+ {
+ bool iscontinuous = true;
+ srcrawofs = srcofs ? srcofs[dims-1] : 0;
+ dstrawofs = dstofs ? dstofs[dims-1] : 0;
+ total = sz[dims-1];
+ for( int i = dims-2; i >= 0; i-- )
+ {
+ if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
+ iscontinuous = false;
+ total *= sz[i];
+ if( srcofs )
+ srcrawofs += srcofs[i]*srcstep[i];
+ if( dstofs )
+ dstrawofs += dstofs[i]*dststep[i];
+ }
+
+ if( !iscontinuous )
+ {
+ // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
+ if( dims == 2 )
+ {
+ new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
+ // we assume that new_... arrays are initialized by caller
+ // with 0's, so there is no else branch
+ if( srcofs )
+ {
+ new_srcofs[0] = srcofs[1];
+ new_srcofs[1] = srcofs[0];
+ new_srcofs[2] = 0;
+ }
+
+ if( dstofs )
+ {
+ new_dstofs[0] = dstofs[1];
+ new_dstofs[1] = dstofs[0];
+ new_dstofs[2] = 0;
+ }
+
+ new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
+ new_dststep[0] = dststep[0]; new_dststep[1] = 0;
+ }
+ else
+ {
+ // we could check for dims == 3 here,
+ // but from user perspective this one is more informative
+ CV_Assert(dims <= 3);
+ new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
+ if( srcofs )
+ {
+ new_srcofs[0] = srcofs[2];
+ new_srcofs[1] = srcofs[1];
+ new_srcofs[2] = srcofs[0];
+ }
+
+ if( dstofs )
+ {
+ new_dstofs[0] = dstofs[2];
+ new_dstofs[1] = dstofs[1];
+ new_dstofs[2] = dstofs[0];
+ }
+
+ new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
+ new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
+ }
+ }
+ return iscontinuous;
+ }
+
+ void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dststep[]) const
+ {
+ if(!u)
+ return;
+ UMatDataAutoLock autolock(u);
+
+ if( u->data && !u->hostCopyObsolete() )
+ {
+ Mat::getStdAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
+ return;
+ }
+ CV_Assert( u->handle != 0 );
+
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+ size_t total = 0, new_sz[] = {0, 0, 0};
+ size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
+ size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
+
+ bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
+ total, new_sz,
+ srcrawofs, new_srcofs, new_srcstep,
+ dstrawofs, new_dstofs, new_dststep);
+ if( iscontinuous )
+ {
+ CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
+ srcrawofs, total, dstptr, 0, 0, 0) >= 0 );
+ }
+ else
+ {
+ CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
+ new_srcofs, new_dstofs, new_sz, new_srcstep[0], new_srcstep[1],
+ new_dststep[0], new_dststep[1], dstptr, 0, 0, 0) >= 0 );
+ }
+ clFinish(q);
+ }
+
+ void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
+ const size_t dstofs[], const size_t dststep[],
+ const size_t srcstep[]) const
+ {
+ if(!u)
+ return;
+
+ // there should be no user-visible CPU copies of the UMat which we are going to copy to
+ CV_Assert(u->refcount == 0);
+
+ size_t total = 0, new_sz[] = {0, 0, 0};
+ size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
+ size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
+
+ bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
+ total, new_sz,
+ srcrawofs, new_srcofs, new_srcstep,
+ dstrawofs, new_dstofs, new_dststep);
+
+ UMatDataAutoLock autolock(u);
+
+ // if there is cached CPU copy of the GPU matrix,
+ // we could use it as a destination.
+ // we can do it in 2 cases:
+ // 1. we overwrite the whole content
+ // 2. we overwrite part of the matrix, but the GPU copy is out-of-date
+ if( u->data && (u->hostCopyObsolete() <= u->deviceCopyObsolete() || total == u->size))
+ {
+ Mat::getStdAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
+ u->markHostCopyObsolete(false);
+ u->markDeviceCopyObsolete(true);
+ return;
+ }
+
+ CV_Assert( u->handle != 0 );
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+ if( iscontinuous )
+ {
+ CV_Assert( clEnqueueWriteBuffer(q, (cl_mem)u->handle,
+ CL_TRUE, dstrawofs, total, srcptr, 0, 0, 0) >= 0 );
+ }
+ else
+ {
+ CV_Assert( clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
+ new_dstofs, new_srcofs, new_sz, new_dststep[0], new_dststep[1],
+ new_srcstep[0], new_srcstep[1], srcptr, 0, 0, 0) >= 0 );
+ }
+
+ u->markHostCopyObsolete(true);
+ u->markDeviceCopyObsolete(false);
+
+ clFinish(q);
+ }
+
+ void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dstofs[], const size_t dststep[], bool sync) const
+ {
+ if(!src || !dst)
+ return;
+
+ size_t total = 0, new_sz[] = {0, 0, 0};
+ size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
+ size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
+
+ bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
+ total, new_sz,
+ srcrawofs, new_srcofs, new_srcstep,
+ dstrawofs, new_dstofs, new_dststep);
+
+ UMatDataAutoLock src_autolock(src);
+ UMatDataAutoLock dst_autolock(dst);
+
+ if( !src->handle || (src->data && src->hostCopyObsolete() <= src->deviceCopyObsolete()) )
+ {
+ upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
+ return;
+ }
+ if( !dst->handle || (dst->data && dst->hostCopyObsolete() <= dst->deviceCopyObsolete()) )
+ {
+ download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
+ dst->markHostCopyObsolete(false);
+ dst->markDeviceCopyObsolete(true);
+ return;
+ }
+
+ // there should be no user-visible CPU copies of the UMat which we are going to copy to
+ CV_Assert(dst->refcount == 0);
+ cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
+
+ if( iscontinuous )
+ {
+ CV_Assert( clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
+ srcrawofs, dstrawofs, total, 0, 0, 0) >= 0 );
+ }
+ else
+ {
+ CV_Assert( clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
+ new_srcofs, new_dstofs, new_sz,
+ new_srcstep[0], new_srcstep[1], new_dststep[0], new_dststep[1],
+ 0, 0, 0) >= 0 );
+ }
+
+ dst->markHostCopyObsolete(true);
+ dst->markDeviceCopyObsolete(false);
+
+ if( sync )
+ clFinish(q);
+ }
+};
+
+MatAllocator* getOpenCLAllocator()
+{
+ static OpenCLAllocator allocator;
+ return &allocator;
+}
+
+}}
#include "opencv2/core/private.hpp"
#include "opencv2/core/private.cuda.hpp"
+#include "opencv2/core/ocl.hpp"
#include <assert.h>
#include <ctype.h>
#if defined WIN32 || defined _WIN32
void deleteThreadAllocData();
-void deleteThreadRNGData();
+void deleteThreadData();
#endif
template<typename T1, typename T2=T1, typename T3=T1> struct OpAdd
void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize );
+struct TLSData
+{
+ TLSData();
+ RNG rng;
+ int device;
+ ocl::Queue oclQueue;
+ int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
+
+ static TLSData* get();
+};
+
+namespace ocl { MatAllocator* getOpenCLAllocator(); }
+
}
#endif /*_CXCORE_INTERNAL_H_*/
}
}
-#ifdef WIN32
-
-
-#ifdef HAVE_WINRT
-// using C++11 thread attribute for local thread data
-__declspec( thread ) RNG* rng = NULL;
-
- void deleteThreadRNGData()
- {
- if (rng)
- delete rng;
}
-RNG& theRNG()
+cv::RNG& cv::theRNG()
{
- if (!rng)
- {
- rng = new RNG;
- }
- return *rng;
-}
-#else
-#ifdef WINCE
-# define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
-#endif
-static DWORD tlsRNGKey = TLS_OUT_OF_INDEXES;
-
- void deleteThreadRNGData()
- {
- if( tlsRNGKey != TLS_OUT_OF_INDEXES )
- delete (RNG*)TlsGetValue( tlsRNGKey );
-}
-
-RNG& theRNG()
-{
- if( tlsRNGKey == TLS_OUT_OF_INDEXES )
- {
- tlsRNGKey = TlsAlloc();
- CV_Assert(tlsRNGKey != TLS_OUT_OF_INDEXES);
- }
- RNG* rng = (RNG*)TlsGetValue( tlsRNGKey );
- if( !rng )
- {
- rng = new RNG;
- TlsSetValue( tlsRNGKey, rng );
- }
- return *rng;
-}
-#endif //HAVE_WINRT
-#else
-
-static pthread_key_t tlsRNGKey = 0;
-static pthread_once_t tlsRNGKeyOnce = PTHREAD_ONCE_INIT;
-
-static void deleteRNG(void* data)
-{
- delete (RNG*)data;
-}
-
-static void makeRNGKey()
-{
- int errcode = pthread_key_create(&tlsRNGKey, deleteRNG);
- CV_Assert(errcode == 0);
-}
-
-RNG& theRNG()
-{
- pthread_once(&tlsRNGKeyOnce, makeRNGKey);
- RNG* rng = (RNG*)pthread_getspecific(tlsRNGKey);
- if( !rng )
- {
- rng = new RNG;
- pthread_setspecific(tlsRNGKey, rng);
- }
- return *rng;
-}
-
-#endif
-
+ return TLSData::get()->rng;
}
void cv::randu(InputOutputArray dst, InputArray low, InputArray high)
if( fdwReason == DLL_THREAD_DETACH || fdwReason == DLL_PROCESS_DETACH )
{
cv::deleteThreadAllocData();
- cv::deleteThreadRNGData();
+ cv::deleteThreadData();
}
return TRUE;
}
}
+//////////////////////////////// thread-local storage ////////////////////////////////
+
+namespace cv
+{
+
+TLSData::TLSData()
+{
+ device = 0;
+ useOpenCL = -1;
+}
+
+#ifdef WIN32
+
+#ifdef HAVE_WINRT
+ // using C++11 thread attribute for local thread data
+ static __declspec( thread ) TLSData* g_tlsdata = NULL;
+
+ static void deleteThreadRNGData()
+ {
+ if (g_tlsdata)
+ delete g_tlsdata;
+ }
+
+ TLSData* TLSData::get()
+ {
+ if (!g_tlsdata)
+ {
+ g_tlsdata = new TLSData;
+ }
+ return g_tlsdata;
+ }
+#else
+#ifdef WINCE
+# define TLS_OUT_OF_INDEXES ((DWORD)0xFFFFFFFF)
+#endif
+ static DWORD tlsKey = TLS_OUT_OF_INDEXES;
+
+ void deleteThreadData()
+ {
+ if( tlsKey != TLS_OUT_OF_INDEXES )
+ delete (TLSData*)TlsGetValue( tlsKey );
+ }
+
+ TLSData* TLSData::get()
+ {
+ if( tlsKey == TLS_OUT_OF_INDEXES )
+ {
+ tlsKey = TlsAlloc();
+ CV_Assert(tlsKey != TLS_OUT_OF_INDEXES);
+ }
+ TLSData* d = (TLSData*)TlsGetValue( tlsKey );
+ if( !d )
+ {
+ d = new TLSData;
+ TlsSetValue( tlsKey, d );
+ }
+ return d;
+ }
+#endif //HAVE_WINRT
+#else
+ static pthread_key_t tlsKey = 0;
+ static pthread_once_t tlsKeyOnce = PTHREAD_ONCE_INIT;
+
+ static void deleteTLSData(void* data)
+ {
+ delete (TLSData*)data;
+ }
+
+ static void makeKey()
+ {
+ int errcode = pthread_key_create(&tlsKey, deleteTLSData);
+ CV_Assert(errcode == 0);
+ }
+
+ TLSData* TLSData::get()
+ {
+ pthread_once(&tlsKeyOnce, makeKey);
+ TLSData* d = (TLSData*)pthread_getspecific(tlsKey);
+ if( !d )
+ {
+ d = new TLSData;
+ pthread_setspecific(tlsKey, d);
+ }
+ return d;
+ }
+#endif
+}
+
/* End of file. */
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "precomp.hpp"
+
+///////////////////////////////// UMat implementation ///////////////////////////////
+
+namespace cv {
+
+// it should be a prime number for the best hash function
+enum { UMAT_NLOCKS = 31 };
+static Mutex umatLocks[UMAT_NLOCKS];
+
+UMatData::UMatData(const MatAllocator* allocator)
+{
+ prevAllocator = currAllocator = allocator;
+ urefcount = refcount = 0;
+ data = origdata = 0;
+ size = 0;
+ flags = 0;
+ handle = 0;
+ userdata = 0;
+}
+
+void UMatData::lock()
+{
+ umatLocks[(size_t)(void*)this % UMAT_NLOCKS].lock();
+}
+
+void UMatData::unlock()
+{
+ umatLocks[(size_t)(void*)this % UMAT_NLOCKS].unlock();
+}
+
+
+MatAllocator* UMat::getStdAllocator()
+{
+ return ocl::getOpenCLAllocator();
+}
+
+void swap( UMat& a, UMat& b )
+{
+ std::swap(a.flags, b.flags);
+ std::swap(a.dims, b.dims);
+ std::swap(a.rows, b.rows);
+ std::swap(a.cols, b.cols);
+ std::swap(a.allocator, b.allocator);
+ std::swap(a.u, b.u);
+ std::swap(a.offset, b.offset);
+
+ std::swap(a.size.p, b.size.p);
+ std::swap(a.step.p, b.step.p);
+ std::swap(a.step.buf[0], b.step.buf[0]);
+ std::swap(a.step.buf[1], b.step.buf[1]);
+
+ if( a.step.p == b.step.buf )
+ {
+ a.step.p = a.step.buf;
+ a.size.p = &a.rows;
+ }
+
+ if( b.step.p == a.step.buf )
+ {
+ b.step.p = b.step.buf;
+ b.size.p = &b.rows;
+ }
+}
+
+
+static inline void setSize( UMat& m, int _dims, const int* _sz,
+ const size_t* _steps, bool autoSteps=false )
+{
+ CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
+ if( m.dims != _dims )
+ {
+ if( m.step.p != m.step.buf )
+ {
+ fastFree(m.step.p);
+ m.step.p = m.step.buf;
+ m.size.p = &m.rows;
+ }
+ if( _dims > 2 )
+ {
+ m.step.p = (size_t*)fastMalloc(_dims*sizeof(m.step.p[0]) + (_dims+1)*sizeof(m.size.p[0]));
+ m.size.p = (int*)(m.step.p + _dims) + 1;
+ m.size.p[-1] = _dims;
+ m.rows = m.cols = -1;
+ }
+ }
+
+ m.dims = _dims;
+ if( !_sz )
+ return;
+
+ size_t esz = CV_ELEM_SIZE(m.flags), total = esz;
+ int i;
+ for( i = _dims-1; i >= 0; i-- )
+ {
+ int s = _sz[i];
+ CV_Assert( s >= 0 );
+ m.size.p[i] = s;
+
+ if( _steps )
+ m.step.p[i] = i < _dims-1 ? _steps[i] : esz;
+ else if( autoSteps )
+ {
+ m.step.p[i] = total;
+ int64 total1 = (int64)total*s;
+ if( (uint64)total1 != (size_t)total1 )
+ CV_Error( CV_StsOutOfRange, "The total matrix size does not fit to \"size_t\" type" );
+ total = (size_t)total1;
+ }
+ }
+
+ if( _dims == 1 )
+ {
+ m.dims = 2;
+ m.cols = 1;
+ m.step[1] = esz;
+ }
+}
+
+static void updateContinuityFlag(UMat& m)
+{
+ int i, j;
+ for( i = 0; i < m.dims; i++ )
+ {
+ if( m.size[i] > 1 )
+ break;
+ }
+
+ for( j = m.dims-1; j > i; j-- )
+ {
+ if( m.step[j]*m.size[j] < m.step[j-1] )
+ break;
+ }
+
+ uint64 t = (uint64)m.step[0]*m.size[0];
+ if( j <= i && t == (size_t)t )
+ m.flags |= UMat::CONTINUOUS_FLAG;
+ else
+ m.flags &= ~UMat::CONTINUOUS_FLAG;
+}
+
+
+static void finalizeHdr(UMat& m)
+{
+ updateContinuityFlag(m);
+ int d = m.dims;
+ if( d > 2 )
+ m.rows = m.cols = -1;
+}
+
+
+UMat Mat::getUMat(int accessFlags) const
+{
+ UMat hdr;
+ if(!u)
+ return hdr;
+ UMat::getStdAllocator()->allocate(u, accessFlags);
+ setSize(hdr, dims, size.p, step.p);
+ finalizeHdr(hdr);
+ hdr.u = u;
+ hdr.offset = data - datastart;
+ return hdr;
+}
+
+void UMat::create(int d, const int* _sizes, int _type)
+{
+ int i;
+ CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes);
+ _type = CV_MAT_TYPE(_type);
+
+ if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() )
+ {
+ if( d == 2 && rows == _sizes[0] && cols == _sizes[1] )
+ return;
+ for( i = 0; i < d; i++ )
+ if( size[i] != _sizes[i] )
+ break;
+ if( i == d && (d > 1 || size[1] == 1))
+ return;
+ }
+
+ release();
+ if( d == 0 )
+ return;
+ flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL;
+ setSize(*this, d, _sizes, 0, true);
+ offset = 0;
+
+ if( total() > 0 )
+ {
+ MatAllocator *a = allocator, *a0 = getStdAllocator();
+ if(!a)
+ a = a0;
+ try
+ {
+ u = a->allocate(dims, size, _type, step.p);
+ CV_Assert(u != 0);
+ }
+ catch(...)
+ {
+ if(a != a0)
+ u = a0->allocate(dims, size, _type, step.p);
+ CV_Assert(u != 0);
+ }
+ CV_Assert( step[dims-1] == (size_t)CV_ELEM_SIZE(flags) );
+ }
+
+ finalizeHdr(*this);
+}
+
+void UMat::copySize(const UMat& m)
+{
+ setSize(*this, m.dims, 0, 0);
+ for( int i = 0; i < dims; i++ )
+ {
+ size[i] = m.size[i];
+ step[i] = m.step[i];
+ }
+}
+
+void UMat::deallocate()
+{
+ u->currAllocator->deallocate(u);
+}
+
+
+UMat::UMat(const UMat& m, const Range& _rowRange, const Range& _colRange)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ CV_Assert( m.dims >= 2 );
+ if( m.dims > 2 )
+ {
+ AutoBuffer<Range> rs(m.dims);
+ rs[0] = _rowRange;
+ rs[1] = _colRange;
+ for( int i = 2; i < m.dims; i++ )
+ rs[i] = Range::all();
+ *this = m(rs);
+ return;
+ }
+
+ *this = m;
+ if( _rowRange != Range::all() && _rowRange != Range(0,rows) )
+ {
+ CV_Assert( 0 <= _rowRange.start && _rowRange.start <= _rowRange.end && _rowRange.end <= m.rows );
+ rows = _rowRange.size();
+ offset += step*_rowRange.start;
+ flags |= SUBMATRIX_FLAG;
+ }
+
+ if( _colRange != Range::all() && _colRange != Range(0,cols) )
+ {
+ CV_Assert( 0 <= _colRange.start && _colRange.start <= _colRange.end && _colRange.end <= m.cols );
+ cols = _colRange.size();
+ offset += _colRange.start*elemSize();
+ flags &= cols < m.cols ? ~CONTINUOUS_FLAG : -1;
+ flags |= SUBMATRIX_FLAG;
+ }
+
+ if( rows == 1 )
+ flags |= CONTINUOUS_FLAG;
+
+ if( rows <= 0 || cols <= 0 )
+ {
+ release();
+ rows = cols = 0;
+ }
+}
+
+
+UMat::UMat(const UMat& m, const Rect& roi)
+ : flags(m.flags), dims(2), rows(roi.height), cols(roi.width),
+ allocator(m.allocator), u(m.u), offset(m.offset + roi.y*m.step[0]), size(&rows)
+{
+ CV_Assert( m.dims <= 2 );
+ flags &= roi.width < m.cols ? ~CONTINUOUS_FLAG : -1;
+ flags |= roi.height == 1 ? CONTINUOUS_FLAG : 0;
+
+ size_t esz = CV_ELEM_SIZE(flags);
+ offset += roi.x*esz;
+ CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols &&
+ 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows );
+ if( u )
+ CV_XADD(&(u->urefcount), 1);
+ if( roi.width < m.cols || roi.height < m.rows )
+ flags |= SUBMATRIX_FLAG;
+
+ step[0] = m.step[0]; step[1] = esz;
+
+ if( rows <= 0 || cols <= 0 )
+ {
+ release();
+ rows = cols = 0;
+ }
+}
+
+
+UMat::UMat(const UMat& m, const Range* ranges)
+ : flags(MAGIC_VAL), dims(0), rows(0), cols(0), allocator(0), u(0), offset(0), size(&rows)
+{
+ int i, d = m.dims;
+
+ CV_Assert(ranges);
+ for( i = 0; i < d; i++ )
+ {
+ Range r = ranges[i];
+ CV_Assert( r == Range::all() || (0 <= r.start && r.start < r.end && r.end <= m.size[i]) );
+ }
+ *this = m;
+ for( i = 0; i < d; i++ )
+ {
+ Range r = ranges[i];
+ if( r != Range::all() && r != Range(0, size.p[i]))
+ {
+ size.p[i] = r.end - r.start;
+ offset += r.start*step.p[i];
+ flags |= SUBMATRIX_FLAG;
+ }
+ }
+ updateContinuityFlag(*this);
+}
+
+UMat UMat::diag(int d) const
+{
+ CV_Assert( dims <= 2 );
+ UMat m = *this;
+ size_t esz = elemSize();
+ int len;
+
+ if( d >= 0 )
+ {
+ len = std::min(cols - d, rows);
+ m.offset += esz*d;
+ }
+ else
+ {
+ len = std::min(rows + d, cols);
+ m.offset -= step[0]*d;
+ }
+ CV_DbgAssert( len > 0 );
+
+ m.size[0] = m.rows = len;
+ m.size[1] = m.cols = 1;
+ m.step[0] += (len > 1 ? esz : 0);
+
+ if( m.rows > 1 )
+ m.flags &= ~CONTINUOUS_FLAG;
+ else
+ m.flags |= CONTINUOUS_FLAG;
+
+ if( size() != Size(1,1) )
+ m.flags |= SUBMATRIX_FLAG;
+
+ return m;
+}
+
+void UMat::locateROI( Size& wholeSize, Point& ofs ) const
+{
+ CV_Assert( dims <= 2 && step[0] > 0 );
+ size_t esz = elemSize(), minstep;
+ ptrdiff_t delta1 = (ptrdiff_t)offset, delta2 = (ptrdiff_t)u->size;
+
+ if( delta1 == 0 )
+ ofs.x = ofs.y = 0;
+ else
+ {
+ ofs.y = (int)(delta1/step[0]);
+ ofs.x = (int)((delta1 - step[0]*ofs.y)/esz);
+ CV_DbgAssert( offset == (size_t)(ofs.y*step[0] + ofs.x*esz) );
+ }
+ minstep = (ofs.x + cols)*esz;
+ wholeSize.height = (int)((delta2 - minstep)/step[0] + 1);
+ wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
+ wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
+ wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
+}
+
+
+UMat& UMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
+{
+ CV_Assert( dims <= 2 && step[0] > 0 );
+ Size wholeSize; Point ofs;
+ size_t esz = elemSize();
+ locateROI( wholeSize, ofs );
+ int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
+ int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
+ offset += (row1 - ofs.y)*step + (col1 - ofs.x)*esz;
+ rows = row2 - row1; cols = col2 - col1;
+ size.p[0] = rows; size.p[1] = cols;
+ if( esz*cols == step[0] || rows == 1 )
+ flags |= CONTINUOUS_FLAG;
+ else
+ flags &= ~CONTINUOUS_FLAG;
+ return *this;
+}
+
+
+UMat UMat::reshape(int new_cn, int new_rows) const
+{
+ int cn = channels();
+ UMat hdr = *this;
+
+ if( dims > 2 && new_rows == 0 && new_cn != 0 && size[dims-1]*cn % new_cn == 0 )
+ {
+ hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
+ hdr.step[dims-1] = CV_ELEM_SIZE(hdr.flags);
+ hdr.size[dims-1] = hdr.size[dims-1]*cn / new_cn;
+ return hdr;
+ }
+
+ CV_Assert( dims <= 2 );
+
+ if( new_cn == 0 )
+ new_cn = cn;
+
+ int total_width = cols * cn;
+
+ if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 )
+ new_rows = rows * total_width / new_cn;
+
+ if( new_rows != 0 && new_rows != rows )
+ {
+ int total_size = total_width * rows;
+ if( !isContinuous() )
+ CV_Error( CV_BadStep,
+ "The matrix is not continuous, thus its number of rows can not be changed" );
+
+ if( (unsigned)new_rows > (unsigned)total_size )
+ CV_Error( CV_StsOutOfRange, "Bad new number of rows" );
+
+ total_width = total_size / new_rows;
+
+ if( total_width * new_rows != total_size )
+ CV_Error( CV_StsBadArg, "The total number of matrix elements "
+ "is not divisible by the new number of rows" );
+
+ hdr.rows = new_rows;
+ hdr.step[0] = total_width * elemSize1();
+ }
+
+ int new_width = total_width / new_cn;
+
+ if( new_width * new_cn != total_width )
+ CV_Error( CV_BadNumChannels,
+ "The total width is not divisible by the new number of channels" );
+
+ hdr.cols = new_width;
+ hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT);
+ hdr.step[1] = CV_ELEM_SIZE(hdr.flags);
+ return hdr;
+}
+
+UMat UMat::diag(const UMat& d)
+{
+ CV_Assert( d.cols == 1 || d.rows == 1 );
+ int len = d.rows + d.cols - 1;
+ UMat m(len, len, d.type(), Scalar(0));
+ UMat md = m.diag();
+ if( d.cols == 1 )
+ d.copyTo(md);
+ else
+ transpose(d, md);
+ return m;
+}
+
+int UMat::checkVector(int _elemChannels, int _depth, bool _requireContinuous) const
+{
+ return (depth() == _depth || _depth <= 0) &&
+ (isContinuous() || !_requireContinuous) &&
+ ((dims == 2 && (((rows == 1 || cols == 1) && channels() == _elemChannels) ||
+ (cols == _elemChannels && channels() == 1))) ||
+ (dims == 3 && channels() == 1 && size.p[2] == _elemChannels && (size.p[0] == 1 || size.p[1] == 1) &&
+ (isContinuous() || step.p[1] == step.p[2]*size.p[2])))
+ ? (int)(total()*channels()/_elemChannels) : -1;
+}
+
+
+UMat UMat::cross(InputArray) const
+{
+ CV_Error(CV_StsNotImplemented, "");
+ return UMat();
+}
+
+
+UMat UMat::reshape(int _cn, int _newndims, const int* _newsz) const
+{
+ if(_newndims == dims)
+ {
+ if(_newsz == 0)
+ return reshape(_cn);
+ if(_newndims == 2)
+ return reshape(_cn, _newsz[0]);
+ }
+
+ CV_Error(CV_StsNotImplemented, "");
+ // TBD
+ return UMat();
+}
+
+
+Mat UMat::getMat(int accessFlags) const
+{
+ if(!u)
+ return Mat();
+ u->currAllocator->map(u, accessFlags);
+ CV_Assert(u->data != 0);
+ Mat hdr(dims, size.p, type(), u->data + offset, step.p);
+ hdr.u = u;
+ hdr.datastart = hdr.data = u->data;
+ hdr.datalimit = hdr.dataend = u->data + u->size;
+ CV_XADD(&hdr.u->refcount, 1);
+ return hdr;
+}
+
+void* UMat::handle(int /*accessFlags*/) const
+{
+ if( !u )
+ return 0;
+
+ // check flags: if CPU copy is newer, copy it back to GPU.
+ if( u->deviceCopyObsolete() )
+ {
+ CV_Assert(u->refcount == 0);
+ u->currAllocator->unmap(u);
+ }
+ /*else if( u->refcount > 0 && (accessFlags & ACCESS_WRITE) )
+ {
+ CV_Error(Error::StsError,
+ "it's not allowed to access UMat handle for writing "
+ "while it's mapped; call Mat::release() first for all its mappings");
+ }*/
+ return u->handle;
+}
+
+void UMat::ndoffset(size_t* ofs) const
+{
+ // offset = step[0]*ofs[0] + step[1]*ofs[1] + step[2]*ofs[2] + ...;
+ size_t val = offset;
+ for( int i = 0; i < dims; i++ )
+ {
+ size_t s = step.p[i];
+ ofs[i] = val / s;
+ val -= ofs[i]*s;
+ }
+}
+
+void UMat::copyTo(OutputArray _dst) const
+{
+ int dtype = _dst.type();
+ if( _dst.fixedType() && dtype != type() )
+ {
+ CV_Assert( channels() == CV_MAT_CN(dtype) );
+ convertTo( _dst, dtype );
+ return;
+ }
+
+ if( empty() )
+ {
+ _dst.release();
+ return;
+ }
+
+ size_t i, sz[CV_MAX_DIM], srcofs[CV_MAX_DIM], dstofs[CV_MAX_DIM], esz = elemSize();
+ for( i = 0; i < (size_t)dims; i++ )
+ sz[i] = size.p[i];
+ sz[dims-1] *= esz;
+ ndoffset(srcofs);
+ srcofs[dims-1] *= esz;
+
+ _dst.create( dims, size.p, type() );
+ if( _dst.kind() == _InputArray::UMAT )
+ {
+ UMat dst = _dst.getUMat();
+ void* srchandle = handle(ACCESS_READ);
+ void* dsthandle = dst.handle(ACCESS_WRITE);
+ if( srchandle == dsthandle && dst.offset == offset )
+ return;
+ ndoffset(dstofs);
+ CV_Assert(u->currAllocator == dst.u->currAllocator);
+ u->currAllocator->copy(u, dst.u, dims, sz, srcofs, step.p, dstofs, dst.step.p, false);
+ }
+ else
+ {
+ Mat dst = _dst.getMat();
+ u->currAllocator->download(u, dst.data, dims, sz, srcofs, step.p, dst.step.p);
+ }
+}
+
+void UMat::convertTo(OutputArray, int, double, double) const
+{
+ CV_Error(Error::StsNotImplemented, "");
+}
+
+UMat& UMat::operator = (const Scalar&)
+{
+ CV_Error(Error::StsNotImplemented, "");
+ return *this;
+}
+
+}
+
+/* End of file. */
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the OpenCV Foundation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+
+#include <string>
+#include <iostream>
+#include <fstream>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include "opencv2/core/ocl.hpp"
+
+using namespace cv;
+using namespace std;
+
+class CV_UMatTest : public cvtest::BaseTest
+{
+public:
+ CV_UMatTest() {}
+ ~CV_UMatTest() {}
+protected:
+ void run(int);
+
+ struct test_excep
+ {
+ test_excep(const string& _s=string("")) : s(_s) {};
+ string s;
+ };
+
+ bool TestUMat();
+
+ void checkDiff(const Mat& m1, const Mat& m2, const string& s)
+ {
+ if (norm(m1, m2, NORM_INF) != 0)
+ throw test_excep(s);
+ }
+ void checkDiffF(const Mat& m1, const Mat& m2, const string& s)
+ {
+ if (norm(m1, m2, NORM_INF) > 1e-5)
+ throw test_excep(s);
+ }
+};
+
+#define STR(a) STR2(a)
+#define STR2(a) #a
+
+#define CHECK_DIFF(a, b) checkDiff(a, b, "(" #a ") != (" #b ") at l." STR(__LINE__))
+#define CHECK_DIFF_FLT(a, b) checkDiffF(a, b, "(" #a ") !=(eps) (" #b ") at l." STR(__LINE__))
+
+
+bool CV_UMatTest::TestUMat()
+{
+ try
+ {
+ Mat a(100, 100, CV_16S), b;
+ randu(a, Scalar::all(-100), Scalar::all(100));
+ Rect roi(1, 3, 10, 20);
+ Mat ra(a, roi), rb;
+ UMat ua, ura;
+ a.copyTo(ua);
+ ua.copyTo(b);
+ CHECK_DIFF(a, b);
+
+ ura = ua(roi);
+ ura.copyTo(rb);
+
+ CHECK_DIFF(ra, rb);
+
+ ra += Scalar::all(1.f);
+ {
+ Mat temp = ura.getMat(ACCESS_RW);
+ temp += Scalar::all(1.f);
+ }
+ ra.copyTo(rb);
+ CHECK_DIFF(ra, rb);
+ }
+ catch (const test_excep& e)
+ {
+ ts->printf(cvtest::TS::LOG, "%s\n", e.s.c_str());
+ ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
+ return false;
+ }
+ return true;
+}
+
+void CV_UMatTest::run( int /* start_from */)
+{
+ printf("Use OpenCL: %s\nHave OpenCL: %s\n",
+ ocl::useOpenCL() ? "TRUE" : "FALSE",
+ ocl::haveOpenCL() ? "TRUE" : "FALSE" );
+
+ if (!TestUMat())
+ return;
+
+ ts->set_failed_test_info(cvtest::TS::OK);
+}
+
+TEST(Core_UMat, base) { CV_UMatTest test; test.safe_run(); }
typedef tr1::tuple<Size, MatType> Size_Source_t;
typedef TestBaseWithParam<Size_Source_t> Size_Source;
-typedef TestBaseWithParam<Size> MatSize;
+typedef TestBaseWithParam<Size> TestMatSize;
static const float rangeHight = 256.0f;
static const float rangeLow = 0.0f;
SANITY_CHECK(hist);
}
+#define MatSize TestMatSize
PERF_TEST_P(MatSize, equalizeHist,
testing::Values(TYPICAL_MAT_SIZES)
)
SANITY_CHECK(destination);
}
+#undef MatSize
typedef tr1::tuple<Size, double> Sz_ClipLimit_t;
typedef TestBaseWithParam<Sz_ClipLimit_t> Sz_ClipLimit;
CvEM::predict( const CvMat* _sample, CvMat* _probs ) const
{
Mat prbs0 = cvarrToMat(_probs), prbs = prbs0, sample = cvarrToMat(_sample);
- int cls = static_cast<int>(emObj.predict(sample, _probs ? _OutputArray(prbs) : cv::noArray())[1]);
+ int cls = static_cast<int>(emObj.predict(sample, _probs ? _OutputArray(prbs) :
+ (OutputArray)cv::noArray())[1]);
if(_probs)
{
if( prbs.data != prbs0.data )
bool isOk = false;
if( _params.start_step == EM::START_AUTO_STEP )
isOk = emObj.train(_samples,
- logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs);
+ logLikelihoods, _labels ? _OutputArray(*_labels) :
+ (OutputArray)cv::noArray(), probs);
else if( _params.start_step == EM::START_E_STEP )
isOk = emObj.trainE(_samples, means, covshdrs, weights,
- logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs);
+ logLikelihoods, _labels ? _OutputArray(*_labels) :
+ (OutputArray)cv::noArray(), probs);
else if( _params.start_step == EM::START_M_STEP )
isOk = emObj.trainM(_samples, prbs,
- logLikelihoods, _labels ? _OutputArray(*_labels) : cv::noArray(), probs);
+ logLikelihoods, _labels ? _OutputArray(*_labels) :
+ (OutputArray)cv::noArray(), probs);
else
CV_Error(CV_StsBadArg, "Bad start type of EM algorithm");
float
CvEM::predict( const Mat& _sample, Mat* _probs ) const
{
- return static_cast<float>(emObj.predict(_sample, _probs ? _OutputArray(*_probs) : cv::noArray())[1]);
+ return static_cast<float>(emObj.predict(_sample, _probs ?
+ _OutputArray(*_probs) :
+ (OutputArray)cv::noArray())[1]);
}
int CvEM::getNClusters() const
surf->set("upright", params.upright != 0);
surf->set("extended", params.extended != 0);
- surf->operator()(img, mask, kpt, _descriptors ? _OutputArray(descr) : noArray(),
+ surf->operator()(img, mask, kpt, _descriptors ? _OutputArray(descr) : (OutputArray)noArray(),
useProvidedKeyPts != 0);
if( _keypoints )
cv::ocl::oclMat::operator cv::_InputArray()
{
- _InputArray newInputArray;
- newInputArray.flags = cv::_InputArray::OCL_MAT;
- newInputArray.obj = reinterpret_cast<void *>(this);
- return newInputArray;
+ return _InputArray(cv::_InputArray::OCL_MAT, this);
}
cv::ocl::oclMat::operator cv::_OutputArray()
{
- _OutputArray newOutputArray;
- newOutputArray.flags = cv::_InputArray::OCL_MAT;
- newOutputArray.obj = reinterpret_cast<void *>(this);
- return newOutputArray;
+ return _OutputArray(cv::_InputArray::OCL_MAT, this);
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
{
- CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
- return *reinterpret_cast<oclMat*>(src.obj);
+ CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
+ return *(oclMat*)src.getObj();
}
cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
{
- CV_Assert(src.flags & cv::_InputArray::OCL_MAT);
- return *reinterpret_cast<oclMat*>(src.obj);
+ CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
+ return *(oclMat*)src.getObj();
}
void cv::ocl::oclMat::download(cv::Mat &m) const
return 0;
}
-static size_t REFCOUNT_OFFSET = (size_t)&(((PyObject*)0)->ob_refcnt) +
- (0x12345678 != *(const size_t*)"\x78\x56\x34\x12\0\0\0\0\0")*sizeof(int);
-
-static inline PyObject* pyObjectFromRefcount(const int* refcount)
-{
- return (PyObject*)((size_t)refcount - REFCOUNT_OFFSET);
-}
-
-static inline int* refcountFromPyObject(const PyObject* obj)
-{
- return (int*)((size_t)obj + REFCOUNT_OFFSET);
-}
-
class NumpyAllocator : public MatAllocator
{
public:
- NumpyAllocator() {}
+ NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); }
~NumpyAllocator() {}
- void allocate(int dims, const int* sizes, int type, int*& refcount,
- uchar*& datastart, uchar*& data, size_t* step)
+ UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const
+ {
+ UMatData* u = new UMatData(this);
+ u->refcount = 1;
+ u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o);
+ npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
+ for( int i = 0; i < dims - 1; i++ )
+ step[i] = (size_t)_strides[i];
+ step[dims-1] = CV_ELEM_SIZE(type);
+ u->size = sizes[0]*step[0];
+ u->userdata = o;
+ return u;
+ }
+
+ UMatData* allocate(int dims0, const int* sizes, int type, size_t* step) const
{
PyEnsureGIL gil;
int cn = CV_MAT_CN(type);
const int f = (int)(sizeof(size_t)/8);
int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE :
- depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :
- depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :
- depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;
- int i;
+ depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :
+ depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :
+ depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;
+ int i, dims = dims0;
cv::AutoBuffer<npy_intp> _sizes(dims + 1);
for( i = 0; i < dims; i++ )
_sizes[i] = sizes[i];
PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum);
if(!o)
CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims));
- refcount = refcountFromPyObject(o);
- npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
- for( i = 0; i < dims - (cn > 1); i++ )
- step[i] = (size_t)_strides[i];
- datastart = data = (uchar*)PyArray_DATA((PyArrayObject*) o);
+ return allocate(o, dims0, sizes, type, step);
}
- void deallocate(int* refcount, uchar*, uchar*)
+ bool allocate(UMatData* u, int accessFlags) const
{
- PyEnsureGIL gil;
- if( !refcount )
- return;
- PyObject* o = pyObjectFromRefcount(refcount);
- Py_INCREF(o);
- Py_DECREF(o);
+ return stdAllocator->allocate(u, accessFlags);
+ }
+
+ void deallocate(UMatData* u) const
+ {
+ if(u)
+ {
+ PyEnsureGIL gil;
+ PyObject* o = (PyObject*)u->userdata;
+ Py_DECREF(o);
+ delete u;
+ }
}
+
+ void map(UMatData*, int) const
+ {
+ }
+
+ void unmap(UMatData* u) const
+ {
+ if(u->urefcount == 0)
+ deallocate(u);
+ }
+
+ void download(UMatData* u, void* dstptr,
+ int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dststep[]) const
+ {
+ stdAllocator->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
+ }
+
+ void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
+ const size_t dstofs[], const size_t dststep[],
+ const size_t srcstep[]) const
+ {
+ stdAllocator->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
+ }
+
+ void copy(UMatData* usrc, UMatData* udst, int dims, const size_t sz[],
+ const size_t srcofs[], const size_t srcstep[],
+ const size_t dstofs[], const size_t dststep[], bool sync) const
+ {
+ stdAllocator->copy(usrc, udst, dims, sz, srcofs, srcstep, dstofs, dststep, sync);
+ }
+
+ const MatAllocator* stdAllocator;
};
NumpyAllocator g_numpyAllocator;
}
m = Mat(ndims, size, type, PyArray_DATA(oarr), step);
+ m.u = g_numpyAllocator.allocate(o, ndims, size, type, step);
- if( m.data )
+ if( !needcopy )
{
- m.refcount = refcountFromPyObject(o);
- if (!needcopy)
- {
- m.addref(); // protect the original numpy array from deallocation
- // (since Mat destructor will decrement the reference counter)
- }
- };
+ Py_INCREF(o);
+ }
m.allocator = &g_numpyAllocator;
return true;
if( !m.data )
Py_RETURN_NONE;
Mat temp, *p = (Mat*)&m;
- if(!p->refcount || p->allocator != &g_numpyAllocator)
+ if(!p->u || p->allocator != &g_numpyAllocator)
{
temp.allocator = &g_numpyAllocator;
ERRWRAP2(m.copyTo(temp));
p = &temp;
}
- p->addref();
- return pyObjectFromRefcount(p->refcount);
+ PyObject* o = (PyObject*)p->u->userdata;
+ Py_INCREF(o);
+ return o;
}
template<>
void Farneback::impl(const Mat& input0, const Mat& input1, OutputArray dst)
{
- calcOpticalFlowFarneback(input0, input1, dst, pyrScale_, numLevels_, winSize_, numIters_, polyN_, polySigma_, flags_);
+ calcOpticalFlowFarneback(input0, input1, (InputOutputArray)dst, pyrScale_,
+ numLevels_, winSize_, numIters_,
+ polyN_, polySigma_, flags_);
}
}
alg_->set("iterations", iterations_);
alg_->set("useInitialFlow", useInitialFlow_);
- alg_->calc(input0, input1, dst);
+ alg_->calc(input0, input1, (InputOutputArray)dst);
}
void DualTVL1::collectGarbage()
if( error )
err = cv::Mat(count, 1, CV_32F, (void*)error);
cv::calcOpticalFlowPyrLK( A, B, ptA, ptB, st,
- error ? cv::_OutputArray(err) : cv::noArray(),
+ error ? cv::_OutputArray(err) : (cv::_OutputArray)cv::noArray(),
winSize, level, criteria, flags);
}
}
void cv::calcOpticalFlowFarneback( InputArray _prev0, InputArray _next0,
- OutputArray _flow0, double pyr_scale, int levels, int winsize,
+ InputOutputArray _flow0, double pyr_scale, int levels, int winsize,
int iterations, int poly_n, double poly_sigma, int flags )
{
Mat prev0 = _prev0.getMat(), next0 = _next0.getMat();