From: Anatoly Baksheev Date: Mon, 19 Jul 2010 09:31:12 +0000 (+0000) Subject: compilation with no cuda re factored X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~8934 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=07825bad1e5effaebf60455f01966b820e688122;p=platform%2Fupstream%2Fopencv.git compilation with no cuda re factored --- diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 4e23b7c..0e303fd 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -1,6 +1,7 @@ -set(name "gpu") -set(DEPS "opencv_core") +set(name "gpu") +set(DEPS "opencv_core") + set(the_target "opencv_${name}") @@ -9,25 +10,25 @@ project(${the_target}) add_definitions(-DCVAPI_EXPORTS) include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" - "${CMAKE_CURRENT_SOURCE_DIR}/cuda" + "${CMAKE_CURRENT_SOURCE_DIR}/src/cuda" "${CMAKE_CURRENT_SOURCE_DIR}/src" "${CMAKE_CURRENT_BINARY_DIR}") foreach(d ${DEPS}) - if(${d} MATCHES "opencv_") + if(${d} MATCHES "opencv_") string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d}) - include_directories("${d_dir}/include") + include_directories("${d_dir}/include") endif() -endforeach() +endforeach() file(GLOB lib_srcs "src/*.cpp") file(GLOB lib_int_hdrs "src/*.h*") -file(GLOB lib_cuda "cuda/*.cu") -file(GLOB lib_cuda_hdrs "cuda/*.h*") +file(GLOB lib_cuda "src/cuda/*.cu") +file(GLOB lib_cuda_hdrs "src/cuda/*.h*") source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs}) source_group("Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs}) -file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") +file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") source_group("Include" FILES ${lib_hdrs}) if (HAVE_CUDA) @@ -35,13 +36,13 @@ if (HAVE_CUDA) link_directories(${CUDA_LIBRARIES}) if (UNIX OR APPLE) - set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC") - endif() - + set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC;") + endif() + CUDA_COMPILE(cuda_objs ${lib_cuda}) #CUDA_BUILD_CLEAN_TARGET() endif() - + add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs}) @@ -50,7 +51,7 @@ if(PCHSupport_FOUND) if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*") if(${CMAKE_GENERATOR} MATCHES "Visual*") set(${the_target}_pch "src/precomp.cpp") - endif() + endif() add_native_precompiled_header(${the_target} ${pch_header}) elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles") add_precompiled_header(${the_target} ${pch_header}) diff --git a/modules/gpu/cuda/mat_operators.cu b/modules/gpu/cuda/mat_operators.cu deleted file mode 100644 index d1ac7e3..0000000 --- a/modules/gpu/cuda/mat_operators.cu +++ /dev/null @@ -1,151 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include -#include "cuda_shared.hpp" -#include "cuda_runtime.h" - -__constant__ float scalar_d[4]; - -namespace mat_operators -{ - - template - struct unroll - { - __device__ static void unroll_set(T * mat, size_t i) - { - mat[i] = static_cast(scalar_d[i % channels]); - unroll::unroll_set(mat, i+1); - } - - __device__ static void unroll_set_with_mask(T * mat, float mask, size_t i) - { - mat[i] = mask * static_cast(scalar_d[i % channels]); - unroll::unroll_set_with_mask(mat, mask, i+1); - } - }; - - template - struct unroll - { - __device__ static void unroll_set(T * , size_t){} - __device__ static void unroll_set_with_mask(T * , float, size_t){} - }; - - template - __global__ void kernel_set_to_without_mask(T * mat) - { - size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T); - unroll::unroll_set(mat, i); - } - - template - __global__ void kernel_set_to_with_mask(T * mat, const float * mask) - { - size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T); - unroll::unroll_set_with_mask(mat, i, mask[i]); - } -} - - -extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels) -{ - scalar_d[0] = scalar[0]; - scalar_d[1] = scalar[1]; - scalar_d[2] = scalar[2]; - scalar_d[3] = scalar[3]; - - int numBlocks = mat.rows * mat.step / 256; - - dim3 threadsPerBlock(256); - - if (channels == 1) - { - if (depth == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); - } - if (channels == 2) - { - if (depth == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); - } - if (channels == 3) - { - if (depth == 1) ::mat_operators::kernel_set_to_with_mask<<>>(mat.ptr, (float *)mask.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned short *)mat.ptr, (float *)mask.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_with_mask<<>>((unsigned int *)mat.ptr, (float *)mask.ptr); - } -} - -extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int depth, int channels) -{ - scalar_d[0] = scalar[0]; - scalar_d[1] = scalar[1]; - scalar_d[2] = scalar[2]; - scalar_d[3] = scalar[3]; - - int numBlocks = mat.rows * mat.step / 256; - - dim3 threadsPerBlock(256); - - if (channels == 1) - { - if (depth == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); - } - if (channels == 2) - { - if (depth == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); - } - if (channels == 3) - { - if (depth == 1) ::mat_operators::kernel_set_to_without_mask<<>>(mat.ptr); - if (depth == 2) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned short *)mat.ptr); - if (depth == 4) ::mat_operators::kernel_set_to_without_mask<<>>((unsigned int *)mat.ptr); - } -} diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp index b1bbd4c..cbb515d 100644 --- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp +++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp @@ -48,12 +48,13 @@ namespace cv namespace gpu { // Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes. - // It is intended to pass to nvcc-compiled code. + // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile template struct DevMem2D_ { - enum { elem_size = sizeof(T) }; + typedef T elem_t; + enum { elem_size = sizeof(elem_t) }; int cols; int rows; diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index b8e8d38..7ce5e79 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -52,15 +52,20 @@ namespace cv { //////////////////////////////// Initialization //////////////////////// + //! This is the only function that do not throw exceptions if the library is compiled without Cuda. CV_EXPORTS int getCudaEnabledDeviceCount(); + + //! Functions below throw cv::Expception if the library is compiled without Cuda. CV_EXPORTS string getDeviceName(int device); CV_EXPORTS void setDevice(int device); + CV_EXPORTS int getDevice(); CV_EXPORTS void getComputeCapability(int device, int* major, int* minor); CV_EXPORTS int getNumberOfSMs(int device); //////////////////////////////// GpuMat //////////////////////////////// + //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. class CV_EXPORTS GpuMat { public: @@ -85,7 +90,7 @@ namespace cv GpuMat(const GpuMat& m, const Rect& roi); //! builds GpuMat from Mat. Perfom blocking upload to device. - GpuMat (const Mat& m); + explicit GpuMat (const Mat& m); //! destructor - calls release() ~GpuMat(); @@ -211,44 +216,109 @@ namespace cv uchar* dataend; }; + //////////////////////////////// MatPL //////////////////////////////// + // MatPL is limited cv::Mat with page locked memory allocation. + // Page locked memory is only needed for async and faster coping to GPU. + // It is convertable to cv::Mat header without reference counting + // so you can use it with other opencv functions. + + class CV_EXPORTS MatPL + { + public: + + //Not supported. Now behaviour is like ALLOC_DEFAULT. + //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 } + + MatPL(); + MatPL(const MatPL& m); + + MatPL(int _rows, int _cols, int _type); + MatPL(Size _size, int _type); + + //! creates from cv::Mat with coping data + explicit MatPL(const Mat& m); + + ~MatPL(); + + MatPL& operator = (const MatPL& m); + + //! returns deep copy of the matrix, i.e. the data is copied + MatPL clone() const; + + //! allocates new matrix data unless the matrix already has specified size and type. + void create(int _rows, int _cols, int _type); + void create(Size _size, int _type); + + //! decrements reference counter and released memory if needed. + void release(); + + //! returns matrix header with disabled reference counting for MatPL data. + Mat createMatHeader() const; + operator Mat() const; + + // Please see cv::Mat for descriptions + bool isContinuous() const; + size_t elemSize() const; + size_t elemSize1() const; + int type() const; + int depth() const; + int channels() const; + size_t step1() const; + Size size() const; + bool empty() const; + + // Please see cv::Mat for descriptions + int flags; + int rows, cols; + size_t step; + + uchar* data; + int* refcount; + + uchar* datastart; + uchar* dataend; + }; + //////////////////////////////// CudaStream //////////////////////////////// + // Encapculates Cuda Stream. Provides interface for async coping. + // Passed to each function that supports async kernel execution. + // Reference counting is enabled - class CudaStream + class CV_EXPORTS CudaStream { public: - - static CudaStream empty(); - CudaStream(); ~CudaStream(); + CudaStream(const CudaStream&); + CudaStream& operator=(const CudaStream&); + bool queryIfComplete(); - void waitForCompletion(); + void waitForCompletion(); - //calls cudaMemcpyAsync + //! downloads asynchronously. + // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat) + void enqueueDownload(const GpuMat& src, MatPL& dst); void enqueueDownload(const GpuMat& src, Mat& dst); - void enqueueUpload(const Mat& src, GpuMat& dst); - void enqueueCopy(const GpuMat& src, GpuMat& dst); - // calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel. - void enqueueMemSet(const GpuMat& src, Scalar val); + //! uploads asynchronously. + // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI) + void enqueueUpload(const MatPL& src, GpuMat& dst); + void enqueueUpload(const Mat& src, GpuMat& dst); - // invoke kernel asynchronous because of mask + void enqueueCopy(const GpuMat& src, GpuMat& dst); + + void enqueueMemSet(const GpuMat& src, Scalar val); void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask); // converts matrix type, ex from float to uchar depending on type - void enqueueConvert(const GpuMat& src, GpuMat& dst, int type); - - struct Impl; - const Impl& getImpl() const; + void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); private: - - Impl *impl; - - - - CudaStream(const CudaStream&); - CudaStream& operator=(const CudaStream&); + void create(); + void release(); + struct Impl; + Impl *impl; + friend struct StreamAccessor; }; //////////////////////////////// StereoBM_GPU //////////////////////////////// @@ -265,17 +335,22 @@ namespace cv StereoBM_GPU(int preset, int ndisparities=0); //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair //! Output disparity has CV_8U type. - void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const; + void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity); + + //! Acync version + void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream); + + //! Some heuristics that tries to estmate + // if current GPU will be faster then CPU in this algorithm. + // It queries current active device. + static bool checkIfGpuCallReasonable(); private: - mutable GpuMat minSSD; + GpuMat minSSD; int preset; int ndisp; }; } } - - - -#include "opencv2/gpu/gpumat.hpp" +#include "opencv2/gpu/matrix_operations.hpp" #endif /* __OPENCV_GPU_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/matpl.hpp b/modules/gpu/include/opencv2/gpu/matpl.hpp deleted file mode 100644 index cecc01d..0000000 --- a/modules/gpu/include/opencv2/gpu/matpl.hpp +++ /dev/null @@ -1,265 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other GpuMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_GPU_MATPL_HPP__ -#define __OPENCV_GPU_MATPL_HPP__ - -#include "opencv2/core/core.hpp" - -namespace cv -{ - namespace gpu - { - - //////////////////////////////// MatPL //////////////////////////////// - - //class CV_EXPORTS MatPL : private Mat - //{ - //public: - // MatPL() {} - // MatPL(int _rows, int _cols, int _type) : Mat(_rows, _cols, _type) {} - // MatPL(Size _size, int _type) : Mat(_size, _type) {} - // - // Mat(int _rows, int _cols, int _type, const Scalar& _s) : Mat - // MatPL(Size _size, int _type, const Scalar& _s); - // //! copy constructor - // MatPL(const Mat& m); - // //! constructor for matrix headers pointing to user-allocated data - // MatPL(int _rows, int _cols, int _type, void* _data, size_t _step=AUTO_STEP); - // MatPL(Size _size, int _type, void* _data, size_t _step=AUTO_STEP); - // //! creates a matrix header for a part of the bigger matrix - // MatPL(const Mat& m, const Range& rowRange, const Range& colRange); - // MatPL(const Mat& m, const Rect& roi); - // //! converts old-style CvMat to the new matrix; the data is not copied by default - // Mat(const CvMat* m, bool copyData=false); - // MatPL converts old-style IplImage to the new matrix; the data is not copied by default - // MatPL(const IplImage* img, bool copyData=false); - // //! builds matrix from std::vector with or without copying the data - // template explicit Mat(const vector<_Tp>& vec, bool copyData=false); - // //! builds matrix from cv::Vec; the data is copied by default - // template explicit Mat(const Vec<_Tp, n>& vec, - // bool copyData=true); - // //! builds matrix from cv::Matx; the data is copied by default - // template explicit Mat(const Matx<_Tp, m, n>& mtx, - // bool copyData=true); - // //! builds matrix from a 2D point - // template explicit Mat(const Point_<_Tp>& pt); - // //! builds matrix from a 3D point - // template explicit Mat(const Point3_<_Tp>& pt); - // //! builds matrix from comma initializer - // template explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer); - // //! helper constructor to compile matrix expressions - // Mat(const MatExpr_Base& expr); - // //! destructor - calls release() - // ~Mat(); - // //! assignment operators - // Mat& operator = (const Mat& m); - // Mat& operator = (const MatExpr_Base& expr); - - // operator MatExpr_() const; - - // //! returns a new matrix header for the specified row - // Mat row(int y) const; - // //! returns a new matrix header for the specified column - // Mat col(int x) const; - // //! ... for the specified row span - // Mat rowRange(int startrow, int endrow) const; - // Mat rowRange(const Range& r) const; - // //! ... for the specified column span - // Mat colRange(int startcol, int endcol) const; - // Mat colRange(const Range& r) const; - // //! ... for the specified diagonal - // // (d=0 - the main diagonal, - // // >0 - a diagonal from the lower half, - // // <0 - a diagonal from the upper half) - // Mat diag(int d=0) const; - // //! constructs a square diagonal matrix which main diagonal is vector "d" - // static Mat diag(const Mat& d); - - // //! returns deep copy of the matrix, i.e. the data is copied - // Mat clone() const; - // //! copies the matrix content to "m". - // // It calls m.create(this->size(), this->type()). - // void copyTo( Mat& m ) const; - // //! copies those matrix elements to "m" that are marked with non-zero mask elements. - // void copyTo( Mat& m, const Mat& mask ) const; - // //! converts matrix to another datatype with optional scalng. See cvConvertScale. - // void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const; - - // void assignTo( Mat& m, int type=-1 ) const; - - // //! sets every matrix element to s - // Mat& operator = (const Scalar& s); - // //! sets some of the matrix elements to s, according to the mask - // Mat& setTo(const Scalar& s, const Mat& mask=Mat()); - // //! creates alternative matrix header for the same data, with different - // // number of channels and/or different number of rows. see cvReshape. - // Mat reshape(int _cn, int _rows=0) const; - - // //! matrix transposition by means of matrix expressions - // MatExpr_ >, Mat> - // t() const; - // //! matrix inversion by means of matrix expressions - // MatExpr_ >, Mat> - // inv(int method=DECOMP_LU) const; - // MatExpr_ >, Mat> - // //! per-element matrix multiplication by means of matrix expressions - // mul(const Mat& m, double scale=1) const; - // MatExpr_ >, Mat> - // mul(const MatExpr_ >, Mat>& m, double scale=1) const; - // MatExpr_ >, Mat> - // mul(const MatExpr_ >, Mat>& m, double scale=1) const; - - // //! computes cross-product of 2 3D vectors - // Mat cross(const Mat& m) const; - // //! computes dot-product - // double dot(const Mat& m) const; - - // //! Matlab-style matrix initialization - // static MatExpr_Initializer zeros(int rows, int cols, int type); - // static MatExpr_Initializer zeros(Size size, int type); - // static MatExpr_Initializer ones(int rows, int cols, int type); - // static MatExpr_Initializer ones(Size size, int type); - // static MatExpr_Initializer eye(int rows, int cols, int type); - // static MatExpr_Initializer eye(Size size, int type); - - // //! allocates new matrix data unless the matrix already has specified size and type. - // // previous data is unreferenced if needed. - // void create(int _rows, int _cols, int _type); - // void create(Size _size, int _type); - // //! increases the reference counter; use with care to avoid memleaks - // void addref(); - // //! decreases reference counter; - // // deallocate the data when reference counter reaches 0. - // void release(); - - // //! locates matrix header within a parent matrix. See below - // void locateROI( Size& wholeSize, Point& ofs ) const; - // //! moves/resizes the current matrix ROI inside the parent matrix. - // Mat& adjustROI( int dtop, int dbottom, int dleft, int dright ); - // //! extracts a rectangular sub-matrix - // // (this is a generalized form of row, rowRange etc.) - // Mat operator()( Range rowRange, Range colRange ) const; - // Mat operator()( const Rect& roi ) const; - - // //! converts header to CvMat; no data is copied - // operator CvMat() const; - // //! converts header to IplImage; no data is copied - // operator IplImage() const; - - // //! returns true iff the matrix data is continuous - // // (i.e. when there are no gaps between successive rows). - // // similar to CV_IS_MAT_CONT(cvmat->type) - // bool isContinuous() const; - // //! returns element size in bytes, - // // similar to CV_ELEM_SIZE(cvmat->type) - // size_t elemSize() const; - // //! returns the size of element channel in bytes. - // size_t elemSize1() const; - // //! returns element type, similar to CV_MAT_TYPE(cvmat->type) - // int type() const; - // //! returns element type, similar to CV_MAT_DEPTH(cvmat->type) - // int depth() const; - // //! returns element type, similar to CV_MAT_CN(cvmat->type) - // int channels() const; - // //! returns step/elemSize1() - // size_t step1() const; - // //! returns matrix size: - // // width == number of columns, height == number of rows - // Size size() const; - // //! returns true if matrix data is NULL - // bool empty() const; - - // //! returns pointer to y-th row - // uchar* ptr(int y=0); - // const uchar* ptr(int y=0) const; - - // //! template version of the above method - // template _Tp* ptr(int y=0); - // template const _Tp* ptr(int y=0) const; - - // //! template methods for read-write or read-only element access. - // // note that _Tp must match the actual matrix type - - // // the functions do not do any on-fly type conversion - // template _Tp& at(int y, int x); - // template _Tp& at(Point pt); - // template const _Tp& at(int y, int x) const; - // template const _Tp& at(Point pt) const; - // template _Tp& at(int i); - // template const _Tp& at(int i) const; - - // //! template methods for iteration over matrix elements. - // // the iterators take care of skipping gaps in the end of rows (if any) - // template MatIterator_<_Tp> begin(); - // template MatIterator_<_Tp> end(); - // template MatConstIterator_<_Tp> begin() const; - // template MatConstIterator_<_Tp> end() const; - - // enum { MAGIC_VAL=0x42FF0000, AUTO_STEP=0, CONTINUOUS_FLAG=CV_MAT_CONT_FLAG }; - - // /*! includes several bit-fields: - // - the magic signature - // - continuity flag - // - depth - // - number of channels - // */ - // int flags; - // //! the number of rows and columns - // int rows, cols; - // //! a distance between successive rows in bytes; includes the gap if any - // size_t step; - // //! pointer to the data - // uchar* data; - - // //! pointer to the reference counter; - // // when matrix points to user-allocated data, the pointer is NULL - // int* refcount; - - // //! helper fields used in locateROI and adjustROI - // uchar* datastart; - // uchar* dataend; - //}; - } -} - - -#endif /* __OPENCV_GPU_MATPL_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/include/opencv2/gpu/gpumat.hpp b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp similarity index 75% rename from modules/gpu/include/opencv2/gpu/gpumat.hpp rename to modules/gpu/include/opencv2/gpu/matrix_operations.hpp index 922ff8d..81117d6 100644 --- a/modules/gpu/include/opencv2/gpu/gpumat.hpp +++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp @@ -43,27 +43,25 @@ #ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ #define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ - namespace cv { namespace gpu { +//////////////////////////////////////////////////////////////////////// //////////////////////////////// GpuMat //////////////////////////////// +//////////////////////////////////////////////////////////////////////// -inline GpuMat::GpuMat() - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} +inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} -inline GpuMat::GpuMat(int _rows, int _cols, int _type) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +inline GpuMat::GpuMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if( _rows > 0 && _cols > 0 ) create( _rows, _cols, _type ); } -inline GpuMat::GpuMat(Size _size, int _type) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +inline GpuMat::GpuMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { if( _size.height > 0 && _size.width > 0 ) create( _size.height, _size.width, _type ); @@ -249,12 +247,9 @@ inline void GpuMat::assignTo( GpuMat& m, int type ) const //CPP GpuMat& GpuMat::operator = (const Scalar& s); //CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat()); - //CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const; - -//CPP void GpuMat::create(int _rows, int _cols, int _type); inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); } - +//CPP void GpuMat::create(int _rows, int _cols, int _type); //CPP void GpuMat::release(); inline void GpuMat::swap(GpuMat& b) @@ -343,6 +338,87 @@ template inline const _Tp* GpuMat::ptr(int y) const static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); } +/////////////////////////////////////////////////////////////////////// +//////////////////////////////// MatPL //////////////////////////////// +/////////////////////////////////////////////////////////////////////// + +MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} +MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( _rows > 0 && _cols > 0 ) + create( _rows, _cols, _type ); +} + +MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( _size.height > 0 && _size.width > 0 ) + create( _size.height, _size.width, _type ); +} + +MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0) +{ + if( refcount ) + CV_XADD(refcount, 1); + +} + +MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if( m.rows > 0 && m.cols > 0 ) + create( m.size(), m.type() ); + + Mat tmp = createMatHeader(); + m.copyTo(tmp); +} + +MatPL::~MatPL() +{ + release(); +} +MatPL& MatPL::operator = (const MatPL& m) +{ + if( this != &m ) + { + if( m.refcount ) + CV_XADD(m.refcount, 1); + release(); + flags = m.flags; + rows = m.rows; cols = m.cols; + step = m.step; data = m.data; + datastart = m.datastart; + dataend = m.dataend; + refcount = m.refcount; + } + return *this; +} + +MatPL MatPL::clone() const +{ + MatPL m(size(), type()); + Mat to = m; + Mat from = *this; + from.copyTo(to); + return m; +} + +inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); } +//CCP void MatPL::create(int _rows, int _cols, int _type); +//CPP void MatPL::release(); + +inline Mat MatPL::createMatHeader() const { return Mat(size(), type(), data); } +inline MatPL::operator Mat() const { return createMatHeader(); } + +inline bool MatPL::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; } +inline size_t MatPL::elemSize() const { return CV_ELEM_SIZE(flags); } +inline size_t MatPL::elemSize1() const { return CV_ELEM_SIZE1(flags); } +inline int MatPL::type() const { return CV_MAT_TYPE(flags); } +inline int MatPL::depth() const { return CV_MAT_DEPTH(flags); } +inline int MatPL::channels() const { return CV_MAT_CN(flags); } +inline size_t MatPL::step1() const { return step/elemSize1(); } +inline Size MatPL::size() const { return Size(cols, rows); } +inline bool MatPL::empty() const { return data == 0; } + + } /* end of namespace gpu */ } /* end of namespace cv */ diff --git a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp new file mode 100644 index 0000000..389b7cd --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp @@ -0,0 +1,64 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__ +#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__ + +#include "opencv2/gpu/gpu.hpp" +#include "cuda_runtime_api.h" + +namespace cv +{ + namespace gpu + { + // This is only header file that depends on Cuda. All other headers are independent. + // So if you use OpenCV binaries you do noot need to install Cuda Toolkit. + // But of you wanna use GPU by yourself, may get cuda stream instance using the class below. + // In this case you have to install Cuda Toolkit. + struct StreamAccessor + { + CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream); + }; + } +} + +#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */ \ No newline at end of file diff --git a/modules/gpu/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp similarity index 81% rename from modules/gpu/cuda/cuda_shared.hpp rename to modules/gpu/src/cuda/cuda_shared.hpp index d7b81c7..917d450 100644 --- a/modules/gpu/cuda/cuda_shared.hpp +++ b/modules/gpu/src/cuda/cuda_shared.hpp @@ -44,31 +44,37 @@ #define __OPENCV_CUDA_SHARED_HPP__ #include "opencv2/gpu/devmem2d.hpp" +#include "cuda_runtime_api.h" namespace cv { namespace gpu - { + { typedef unsigned char uchar; typedef unsigned short ushort; - typedef unsigned int uint; + typedef unsigned int uint; extern "C" void error( const char *error_string, const char *file, const int line, const char *func = ""); namespace impl - { + { static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; } extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_& minSSD_buf); - - extern "C" void set_to_without_mask (const DevMem2D& mat, const double * scalar, int depth, int channels); - extern "C" void set_to_with_mask (const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels); } } } -#ifdef __CUDACC__ - #define cudaSafeCall(expr) { cudaError_t err = expr; if( cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); } +#if defined(__GNUC__) + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__); +#else /* defined(__CUDACC__) || defined(__MSVC__) */ + #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) #endif + static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "") + { + if( cudaSuccess != err) + cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, func); + } + #endif /* __OPENCV_CUDA_SHARED_HPP__ */ diff --git a/modules/gpu/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu similarity index 100% rename from modules/gpu/cuda/stereobm.cu rename to modules/gpu/src/cuda/stereobm.cu diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp index 818dff0..77c2900 100644 --- a/modules/gpu/src/cudastream.cpp +++ b/modules/gpu/src/cudastream.cpp @@ -41,57 +41,119 @@ //M*/ #include "precomp.hpp" -//#include "opencv2/gpu/stream_access.hpp" using namespace cv; using namespace cv::gpu; -cv::gpu::CudaStream::CudaStream() //: impl( (Impl*)fastMalloc(sizeof(Impl)) ) +#if !defined (HAVE_CUDA) + +void cv::gpu::CudaStream::create() { throw_nogpu(); } +void cv::gpu::CudaStream::release() { throw_nogpu(); } +cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); } +cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); } +cv::gpu::CudaStream::CudaStream(const CudaStream& stream) { throw_nogpu(); } +CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) { throw_nogpu(); return *this; } +bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; } +void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) { throw_nogpu(); } +void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +#include "opencv2/gpu/stream_accessor.hpp" + +struct CudaStream::Impl { - //cudaSafeCall( cudaStreamCreate( &impl->stream) ); -} -cv::gpu::CudaStream::~CudaStream() + cudaStream_t stream; + int ref_counter; +}; +namespace { - if (impl) + template void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k) { - cudaSafeCall( cudaStreamDestroy( *(cudaStream_t*)impl ) ); - cv::fastFree( impl ); - } + dst.create(src.size(), src.type()); + size_t bwidth = src.cols * src.elemSize(); + cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) ); + }; } -bool cv::gpu::CudaStream::queryIfComplete() +CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; }; + +void cv::gpu::CudaStream::create() { - //cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl ); + if (impl) + release(); - //if (err == cudaSuccess) - // return true; + cudaStream_t stream; + cudaSafeCall( cudaStreamCreate( &stream ) ); - //if (err == cudaErrorNotReady) - // return false; + impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl)); - ////cudaErrorInvalidResourceHandle - //cudaSafeCall( err ); - return true; + impl->stream = stream; + impl->ref_counter = 1; } -void cv::gpu::CudaStream::waitForCompletion() + +void cv::gpu::CudaStream::release() { - cudaSafeCall( cudaStreamSynchronize( *(cudaStream_t*)impl ) ); + if( impl && CV_XADD(&impl->ref_counter, -1) == 1 ) + { + cudaSafeCall( cudaStreamDestroy( impl->stream ) ); + cv::fastFree( impl ); + } } -void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) +cv::gpu::CudaStream::CudaStream() : impl(0) { create(); } +cv::gpu::CudaStream::~CudaStream() { release(); } + +cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl) { -// cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost, + if( impl ) + CV_XADD(&impl->ref_counter, 1); } -void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) +CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) { - CV_Assert(!"Not implemented"); + if( this != &stream ) + { + if( stream.impl ) + CV_XADD(&stream.impl->ref_counter, 1); + + release(); + impl = stream.impl; + } + return *this; } -void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) + +bool cv::gpu::CudaStream::queryIfComplete() { - CV_Assert(!"Not implemented"); + cudaError_t err = cudaStreamQuery( impl->stream ); + + if (err == cudaErrorNotReady || err == cudaSuccess) + return err == cudaSuccess; + + cudaSafeCall(err); } +void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); } + +void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) +{ + // if not -> allocation will be done, but after that dst will not point to page locked memory + CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() ) + devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); +} +void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); } + +void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } +void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } +void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); } + void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) { CV_Assert(!"Not implemented"); @@ -102,11 +164,10 @@ void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const Gpu CV_Assert(!"Not implemented"); } -void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type) +void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b) { CV_Assert(!"Not implemented"); } -//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; } - +#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index e88fbb9..416d3d4 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -45,15 +45,18 @@ using namespace cv; using namespace cv::gpu; -#ifndef HAVE_CUDA + +#if !defined (HAVE_CUDA) CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() { return 0; } -CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/) { cudaSafeCall(0); return 0; } -CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { cudaSafeCall(0); } -CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { cudaSafeCall(0); } -CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { cudaSafeCall(0); return 0; } +CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/) { throw_nogpu(); return 0; } +CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); } +CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; } +CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); } +CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; } + -#else +#else /* !defined (HAVE_CUDA) */ CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() { @@ -73,6 +76,12 @@ CV_EXPORTS void cv::gpu::setDevice(int device) { cudaSafeCall( cudaSetDevice( device ) ); } +CV_EXPORTS int cv::gpu::getDevice() +{ + int device; + cudaSafeCall( cudaGetDevice( &device ) ); + return device; +} CV_EXPORTS void cv::gpu::getComputeCapability(int device, int* major, int* minor) { @@ -90,4 +99,5 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device) return prop.multiProcessorCount; } -#endif \ No newline at end of file +#endif + diff --git a/modules/gpu/src/gpumat.cpp b/modules/gpu/src/matrix_operations.cpp similarity index 56% rename from modules/gpu/src/gpumat.cpp rename to modules/gpu/src/matrix_operations.cpp index 2849868..1d27afb 100644 --- a/modules/gpu/src/gpumat.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -45,23 +45,53 @@ using namespace cv; using namespace cv::gpu; +//////////////////////////////////////////////////////////////////////// //////////////////////////////// GpuMat //////////////////////////////// +//////////////////////////////////////////////////////////////////////// -void GpuMat::upload(const Mat& m) + +#if !defined (HAVE_CUDA) + +namespace cv +{ + namespace gpu + { + void GpuMat::upload(const Mat& /*m*/) { throw_nogpu(); } + void GpuMat::download(cv::Mat& /*m*/) const { throw_nogpu(); } + void GpuMat::copyTo( GpuMat& /*m*/ ) const { throw_nogpu(); } + void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const { throw_nogpu(); } + void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { throw_nogpu(); } + GpuMat& GpuMat::operator = (const Scalar& /*s*/) { throw_nogpu(); return *this; } + GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) { throw_nogpu(); return *this; } + GpuMat GpuMat::reshape(int /*new_cn*/, int /*new_rows*/) const { throw_nogpu(); return GpuMat(); } + void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } + void GpuMat::release() { throw_nogpu(); } + + void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } + void MatPL::release() { throw_nogpu(); } + } + +} + + +#else /* !defined (HAVE_CUDA) */ + + +void cv::gpu::GpuMat::upload(const Mat& m) { CV_DbgAssert(!m.empty()); create(m.size(), m.type()); cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); } -void GpuMat::download(cv::Mat& m) const +void cv::gpu::GpuMat::download(cv::Mat& m) const { CV_DbgAssert(!this->empty()); m.create(size(), type()); cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); } -void GpuMat::copyTo( GpuMat& m ) const +void cv::gpu::GpuMat::copyTo( GpuMat& m ) const { CV_DbgAssert(!this->empty()); m.create(size(), type()); @@ -69,45 +99,30 @@ void GpuMat::copyTo( GpuMat& m ) const cudaSafeCall( cudaThreadSynchronize() ); } -void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const -{ +void cv::gpu::GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const +{ CV_Assert(!"Not implemented"); } -void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const +void cv::gpu::GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { CV_Assert(!"Not implemented"); } -GpuMat& GpuMat::operator = (const Scalar& s) +GpuMat& cv::gpu::GpuMat::operator = (const Scalar& /*s*/) { - cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels()); + CV_Assert(!"Not implemented"); return *this; } -GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) +GpuMat& cv::gpu::GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) { - CV_Assert(mask.type() == CV_32F); - - CV_DbgAssert(!this->empty()); - - this->channels(); - this->depth(); - - if (mask.empty()) - { - cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels()); - } - else - { - cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->depth(), this->channels()); - } - + CV_Assert(!"Not implemented"); return *this; } -GpuMat GpuMat::reshape(int new_cn, int new_rows) const +GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const { GpuMat hdr = *this; @@ -148,7 +163,7 @@ GpuMat GpuMat::reshape(int new_cn, int new_rows) const return hdr; } -void GpuMat::create(int _rows, int _cols, int _type) +void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) { _type &= TYPE_MASK; if( rows == _rows && cols == _cols && type() == _type && data ) @@ -162,7 +177,7 @@ void GpuMat::create(int _rows, int _cols, int _type) rows = _rows; cols = _cols; - size_t esz = elemSize(); + size_t esz = elemSize(); void *dev_ptr; cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) ); @@ -174,19 +189,19 @@ void GpuMat::create(int _rows, int _cols, int _type) size_t nettosize = (size_t)_nettosize; datastart = data = (uchar*)dev_ptr; - dataend = data + nettosize; + dataend = data + nettosize; refcount = (int*)fastMalloc(sizeof(*refcount)); *refcount = 1; } } -void GpuMat::release() +void cv::gpu::GpuMat::release() { if( refcount && CV_XADD(refcount, -1) == 1 ) { fastFree(refcount); - cudaSafeCall( cudaFree(datastart) ); + cudaSafeCall( cudaFree(datastart) ); } data = datastart = dataend = 0; step = rows = cols = 0; @@ -194,7 +209,52 @@ void GpuMat::release() } +/////////////////////////////////////////////////////////////////////// +//////////////////////////////// MatPL //////////////////////////////// +/////////////////////////////////////////////////////////////////////// + +void cv::gpu::MatPL::create(int _rows, int _cols, int _type) +{ + _type &= TYPE_MASK; + if( rows == _rows && cols == _cols && type() == _type && data ) + return; + if( data ) + release(); + CV_DbgAssert( _rows >= 0 && _cols >= 0 ); + if( _rows > 0 && _cols > 0 ) + { + flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type; + rows = _rows; + cols = _cols; + step = elemSize()*cols; + int64 _nettosize = (int64)step*rows; + size_t nettosize = (size_t)_nettosize; + if( _nettosize != (int64)nettosize ) + CV_Error(CV_StsNoMem, "Too big buffer is allocated"); + size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); + + //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount)); + void *ptr; + cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) ); + datastart = data = (uchar*)ptr; + dataend = data + nettosize; + refcount = (int*)cv::fastMalloc(sizeof(*refcount)); + *refcount = 1; + } +} +void cv::gpu::MatPL::release() +{ + if( refcount && CV_XADD(refcount, -1) == 1 ) + { + cudaSafeCall( cudaFreeHost(datastart ) ); + fastFree(refcount); + } + data = datastart = dataend = 0; + step = rows = cols = 0; + refcount = 0; +} +#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file diff --git a/modules/gpu/src/precomp.cpp b/modules/gpu/src/precomp.cpp index c4a61d9..d0216db 100644 --- a/modules/gpu/src/precomp.cpp +++ b/modules/gpu/src/precomp.cpp @@ -44,7 +44,13 @@ /* End of file. */ -extern "C" void cv::gpu::error( const char *error_string, const char *file, const int line, const char *func) -{ - cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) ); -} +namespace cv +{ + namespace gpu + { + extern "C" void error(const char *error_string, const char *file, const int line, const char *func) + { + cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) ); + } + } +} diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index ca87d9a..a632af0 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -53,30 +53,17 @@ #include #include "opencv2/gpu/gpu.hpp" -#include "cuda_shared.hpp" -#ifndef HAVE_CUDA - #define cudaSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support") - #define cudaCallerSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support") +#if defined(HAVE_CUDA) -#else /* HAVE_CUDA */ + #include "cuda_shared.hpp" + #include "cuda_runtime_api.h" -#if _MSC_VER >= 1200 - #pragma warning (disable : 4100 4211 4201 4408) -#endif - -#include "cuda_runtime_api.h" - -#ifdef __GNUC__ - #define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, __func__); } -#else - #define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); } -#endif +#else /* defined(HAVE_CUDA) */ -#define cudaCallerSafeCall(expr) expr; + static inline void throw_nogpu() { CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support"); } + +#endif /* defined(HAVE_CUDA) */ - -#endif /* HAVE_CUDA */ - -#endif +#endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp index fb15631..ae96700 100644 --- a/modules/gpu/src/stereobm_gpu.cpp +++ b/modules/gpu/src/stereobm_gpu.cpp @@ -44,15 +44,45 @@ using namespace cv; using namespace cv::gpu; + +#if !defined (HAVE_CUDA) + +cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_nogpu(); } +cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) { throw_nogpu(); } + +bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; } +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) { throw_nogpu(); } +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) { throw_nogpu(); } + + +#else /* !defined (HAVE_CUDA) */ -StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {} -StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) +cv::gpu::StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {} +cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) { const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); CV_Assert(ndisp <= max_supported_ndisp); + CV_Assert(ndisp % 8 == 0); +} + +bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() +{ + if (0 == getCudaEnabledDeviceCount()) + return false; + + int device = getDevice(); + + int minor, major; + getComputeCapability(device, &major, &minor); + int numSM = getNumberOfSMs(device); + + if (major > 1 || numSM > 16) + return true; + + return false; } -void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) { CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); CV_DbgAssert(left.type() == CV_8UC1); @@ -67,6 +97,13 @@ void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& } DevMem2D disp = disparity; - DevMem2D_ mssd = minSSD; - cudaCallerSafeCall( impl::stereoBM_GPU(left, right, disp, ndisp, mssd) ); + DevMem2D_ mssd = minSSD; + impl::stereoBM_GPU(left, right, disp, ndisp, mssd); } + +void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) +{ + CV_Assert(!"Not implemented"); +} + +#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file