-set(name "gpu")\r
-set(DEPS "opencv_core")\r
\r
+set(name "gpu") \r
+set(DEPS "opencv_core")\r
+ \r
\r
set(the_target "opencv_${name}")\r
\r
add_definitions(-DCVAPI_EXPORTS)\r
\r
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"\r
- "${CMAKE_CURRENT_SOURCE_DIR}/cuda"\r
+ "${CMAKE_CURRENT_SOURCE_DIR}/src/cuda"\r
"${CMAKE_CURRENT_SOURCE_DIR}/src"\r
"${CMAKE_CURRENT_BINARY_DIR}")\r
\r
foreach(d ${DEPS})\r
- if(${d} MATCHES "opencv_")\r
+ if(${d} MATCHES "opencv_") \r
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})\r
- include_directories("${d_dir}/include")\r
+ include_directories("${d_dir}/include") \r
endif()\r
-endforeach()\r
+endforeach() \r
\r
file(GLOB lib_srcs "src/*.cpp")\r
file(GLOB lib_int_hdrs "src/*.h*")\r
-file(GLOB lib_cuda "cuda/*.cu")\r
-file(GLOB lib_cuda_hdrs "cuda/*.h*")\r
+file(GLOB lib_cuda "src/cuda/*.cu")\r
+file(GLOB lib_cuda_hdrs "src/cuda/*.h*") \r
source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})\r
source_group("Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})\r
\r
-file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")\r
+file(GLOB lib_hdrs "include/opencv2/${name}/*.h*") \r
source_group("Include" FILES ${lib_hdrs})\r
\r
if (HAVE_CUDA)\r
link_directories(${CUDA_LIBRARIES})\r
\r
if (UNIX OR APPLE)\r
- set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")\r
- endif()\r
-\r
+ set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC;")\r
+ endif() \r
+ \r
CUDA_COMPILE(cuda_objs ${lib_cuda})\r
#CUDA_BUILD_CLEAN_TARGET()\r
endif()\r
-\r
+ \r
\r
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs})\r
\r
if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")\r
if(${CMAKE_GENERATOR} MATCHES "Visual*")\r
set(${the_target}_pch "src/precomp.cpp")\r
- endif()\r
+ endif() \r
add_native_precompiled_header(${the_target} ${pch_header})\r
elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")\r
add_precompiled_header(${the_target} ${pch_header})\r
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-// By downloading, copying, installing or using the software you agree to this license.
-// If you do not agree to this license, do not download, install,
-// copy or use the software.
-//
-//
-// License Agreement
-// For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-// * Redistribution's of source code must retain the above copyright notice,
-// this list of conditions and the following disclaimer.
-//
-// * Redistribution's in binary form must reproduce the above copyright notice,
-// this list of conditions and the following disclaimer in the documentation
-// and/or other materials provided with the distribution.
-//
-// * The name of the copyright holders may not be used to endorse or promote products
-// derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include <stddef.h>
-#include "cuda_shared.hpp"
-#include "cuda_runtime.h"
-
-__constant__ float scalar_d[4];
-
-namespace mat_operators
-{
-
- template <typename T, int channels, int count = channels>
- struct unroll
- {
- __device__ static void unroll_set(T * mat, size_t i)
- {
- mat[i] = static_cast<T>(scalar_d[i % channels]);
- unroll<T, channels, count - 1>::unroll_set(mat, i+1);
- }
-
- __device__ static void unroll_set_with_mask(T * mat, float mask, size_t i)
- {
- mat[i] = mask * static_cast<T>(scalar_d[i % channels]);
- unroll<T, channels, count - 1>::unroll_set_with_mask(mat, mask, i+1);
- }
- };
-
- template <typename T, int channels>
- struct unroll<T,channels,0>
- {
- __device__ static void unroll_set(T * , size_t){}
- __device__ static void unroll_set_with_mask(T * , float, size_t){}
- };
-
- template <typename T, int channels>
- __global__ void kernel_set_to_without_mask(T * mat)
- {
- size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
- unroll<T, channels>::unroll_set(mat, i);
- }
-
- template <typename T, int channels>
- __global__ void kernel_set_to_with_mask(T * mat, const float * mask)
- {
- size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
- unroll<T, channels>::unroll_set_with_mask(mat, i, mask[i]);
- }
-}
-
-
-extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels)
-{
- scalar_d[0] = scalar[0];
- scalar_d[1] = scalar[1];
- scalar_d[2] = scalar[2];
- scalar_d[3] = scalar[3];
-
- int numBlocks = mat.rows * mat.step / 256;
-
- dim3 threadsPerBlock(256);
-
- if (channels == 1)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
- }
- if (channels == 2)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
- }
- if (channels == 3)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
- }
-}
-
-extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int depth, int channels)
-{
- scalar_d[0] = scalar[0];
- scalar_d[1] = scalar[1];
- scalar_d[2] = scalar[2];
- scalar_d[3] = scalar[3];
-
- int numBlocks = mat.rows * mat.step / 256;
-
- dim3 threadsPerBlock(256);
-
- if (channels == 1)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
- }
- if (channels == 2)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
- }
- if (channels == 3)
- {
- if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr);
- if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
- if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
- }
-}
namespace gpu\r
{\r
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.\r
- // It is intended to pass to nvcc-compiled code.\r
+ // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile\r
\r
template<typename T = unsigned char>\r
struct DevMem2D_\r
{\r
- enum { elem_size = sizeof(T) };\r
+ typedef T elem_t;\r
+ enum { elem_size = sizeof(elem_t) };\r
\r
int cols;\r
int rows;\r
{ \r
//////////////////////////////// Initialization ////////////////////////\r
\r
+ //! This is the only function that do not throw exceptions if the library is compiled without Cuda.\r
CV_EXPORTS int getCudaEnabledDeviceCount();\r
+\r
+ //! Functions below throw cv::Expception if the library is compiled without Cuda.\r
CV_EXPORTS string getDeviceName(int device);\r
CV_EXPORTS void setDevice(int device); \r
+ CV_EXPORTS int getDevice(); \r
\r
CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);\r
CV_EXPORTS int getNumberOfSMs(int device);\r
\r
//////////////////////////////// GpuMat ////////////////////////////////\r
\r
+ //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. \r
class CV_EXPORTS GpuMat\r
{\r
public:\r
GpuMat(const GpuMat& m, const Rect& roi);\r
\r
//! builds GpuMat from Mat. Perfom blocking upload to device.\r
- GpuMat (const Mat& m);\r
+ explicit GpuMat (const Mat& m);\r
\r
//! destructor - calls release()\r
~GpuMat();\r
uchar* dataend;\r
};\r
\r
+ //////////////////////////////// MatPL ////////////////////////////////\r
+ // MatPL is limited cv::Mat with page locked memory allocation.\r
+ // Page locked memory is only needed for async and faster coping to GPU.\r
+ // It is convertable to cv::Mat header without reference counting\r
+ // so you can use it with other opencv functions.\r
+ \r
+ class CV_EXPORTS MatPL\r
+ {\r
+ public: \r
+\r
+ //Not supported. Now behaviour is like ALLOC_DEFAULT.\r
+ //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }\r
+\r
+ MatPL(); \r
+ MatPL(const MatPL& m); \r
+\r
+ MatPL(int _rows, int _cols, int _type);\r
+ MatPL(Size _size, int _type); \r
+\r
+ //! creates from cv::Mat with coping data\r
+ explicit MatPL(const Mat& m);\r
+ \r
+ ~MatPL(); \r
+\r
+ MatPL& operator = (const MatPL& m);\r
+ \r
+ //! returns deep copy of the matrix, i.e. the data is copied\r
+ MatPL clone() const;\r
+ \r
+ //! allocates new matrix data unless the matrix already has specified size and type. \r
+ void create(int _rows, int _cols, int _type);\r
+ void create(Size _size, int _type); \r
+\r
+ //! decrements reference counter and released memory if needed.\r
+ void release();\r
+\r
+ //! returns matrix header with disabled reference counting for MatPL data.\r
+ Mat createMatHeader() const;\r
+ operator Mat() const;\r
+ \r
+ // Please see cv::Mat for descriptions\r
+ bool isContinuous() const; \r
+ size_t elemSize() const; \r
+ size_t elemSize1() const; \r
+ int type() const; \r
+ int depth() const; \r
+ int channels() const; \r
+ size_t step1() const; \r
+ Size size() const; \r
+ bool empty() const;\r
+ \r
+ // Please see cv::Mat for descriptions\r
+ int flags; \r
+ int rows, cols; \r
+ size_t step;\r
+\r
+ uchar* data; \r
+ int* refcount; \r
+\r
+ uchar* datastart;\r
+ uchar* dataend;\r
+ };\r
+\r
//////////////////////////////// CudaStream ////////////////////////////////\r
+ // Encapculates Cuda Stream. Provides interface for async coping.\r
+ // Passed to each function that supports async kernel execution.\r
+ // Reference counting is enabled\r
\r
- class CudaStream\r
+ class CV_EXPORTS CudaStream\r
{\r
public:\r
-\r
- static CudaStream empty();\r
-\r
CudaStream(); \r
~CudaStream();\r
\r
+ CudaStream(const CudaStream&); \r
+ CudaStream& operator=(const CudaStream&);\r
+\r
bool queryIfComplete();\r
- void waitForCompletion(); \r
+ void waitForCompletion(); \r
\r
- //calls cudaMemcpyAsync\r
+ //! downloads asynchronously. \r
+ // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)\r
+ void enqueueDownload(const GpuMat& src, MatPL& dst);\r
void enqueueDownload(const GpuMat& src, Mat& dst);\r
- void enqueueUpload(const Mat& src, GpuMat& dst);\r
- void enqueueCopy(const GpuMat& src, GpuMat& dst);\r
\r
- // calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel.\r
- void enqueueMemSet(const GpuMat& src, Scalar val);\r
+ //! uploads asynchronously. \r
+ // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)\r
+ void enqueueUpload(const MatPL& src, GpuMat& dst); \r
+ void enqueueUpload(const Mat& src, GpuMat& dst);\r
\r
- // invoke kernel asynchronous because of mask\r
+ void enqueueCopy(const GpuMat& src, GpuMat& dst);\r
+ \r
+ void enqueueMemSet(const GpuMat& src, Scalar val); \r
void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask);\r
\r
// converts matrix type, ex from float to uchar depending on type\r
- void enqueueConvert(const GpuMat& src, GpuMat& dst, int type); \r
- \r
- struct Impl;\r
- const Impl& getImpl() const;\r
+ void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); \r
private:\r
- \r
- Impl *impl; \r
-\r
- \r
- \r
- CudaStream(const CudaStream&); \r
- CudaStream& operator=(const CudaStream&);\r
+ void create();\r
+ void release();\r
+ struct Impl;\r
+ Impl *impl; \r
+ friend struct StreamAccessor;\r
};\r
\r
//////////////////////////////// StereoBM_GPU ////////////////////////////////\r
StereoBM_GPU(int preset, int ndisparities=0);\r
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair\r
//! Output disparity has CV_8U type.\r
- void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const; \r
+ void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);\r
+\r
+ //! Acync version\r
+ void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);\r
+\r
+ //! Some heuristics that tries to estmate \r
+ // if current GPU will be faster then CPU in this algorithm.\r
+ // It queries current active device.\r
+ static bool checkIfGpuCallReasonable();\r
private:\r
- mutable GpuMat minSSD;\r
+ GpuMat minSSD;\r
int preset;\r
int ndisp;\r
};\r
}\r
}\r
-\r
-\r
-\r
-#include "opencv2/gpu/gpumat.hpp"\r
+#include "opencv2/gpu/matrix_operations.hpp"\r
\r
#endif /* __OPENCV_GPU_HPP__ */
\ No newline at end of file
+++ /dev/null
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-// By downloading, copying, installing or using the software you agree to this license.\r
-// If you do not agree to this license, do not download, install,\r
-// copy or use the software.\r
-//\r
-//\r
-// License Agreement\r
-// For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-// * Redistribution's of source code must retain the above copyright notice,\r
-// this list of conditions and the following disclaimer.\r
-//\r
-// * Redistribution's in binary form must reproduce the above copyright notice,\r
-// this list of conditions and the following disclaimer in the documentation\r
-// and/or other GpuMaterials provided with the distribution.\r
-//\r
-// * The name of the copyright holders may not be used to endorse or promote products\r
-// derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#ifndef __OPENCV_GPU_MATPL_HPP__\r
-#define __OPENCV_GPU_MATPL_HPP__\r
-\r
-#include "opencv2/core/core.hpp"\r
-\r
-namespace cv\r
-{\r
- namespace gpu\r
- { \r
-\r
- //////////////////////////////// MatPL ////////////////////////////////\r
-\r
- //class CV_EXPORTS MatPL : private Mat\r
- //{\r
- //public: \r
- // MatPL() {} \r
- // MatPL(int _rows, int _cols, int _type) : Mat(_rows, _cols, _type) {}\r
- // MatPL(Size _size, int _type) : Mat(_size, _type) {}\r
- // \r
- // Mat(int _rows, int _cols, int _type, const Scalar& _s) : Mat\r
- // MatPL(Size _size, int _type, const Scalar& _s);\r
- // //! copy constructor\r
- // MatPL(const Mat& m);\r
- // //! constructor for matrix headers pointing to user-allocated data\r
- // MatPL(int _rows, int _cols, int _type, void* _data, size_t _step=AUTO_STEP);\r
- // MatPL(Size _size, int _type, void* _data, size_t _step=AUTO_STEP);\r
- // //! creates a matrix header for a part of the bigger matrix\r
- // MatPL(const Mat& m, const Range& rowRange, const Range& colRange);\r
- // MatPL(const Mat& m, const Rect& roi);\r
- // //! converts old-style CvMat to the new matrix; the data is not copied by default\r
- // Mat(const CvMat* m, bool copyData=false);\r
- // MatPL converts old-style IplImage to the new matrix; the data is not copied by default\r
- // MatPL(const IplImage* img, bool copyData=false);\r
- // //! builds matrix from std::vector with or without copying the data\r
- // template<typename _Tp> explicit Mat(const vector<_Tp>& vec, bool copyData=false);\r
- // //! builds matrix from cv::Vec; the data is copied by default\r
- // template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec,\r
- // bool copyData=true);\r
- // //! builds matrix from cv::Matx; the data is copied by default\r
- // template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx,\r
- // bool copyData=true);\r
- // //! builds matrix from a 2D point\r
- // template<typename _Tp> explicit Mat(const Point_<_Tp>& pt);\r
- // //! builds matrix from a 3D point\r
- // template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt);\r
- // //! builds matrix from comma initializer\r
- // template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);\r
- // //! helper constructor to compile matrix expressions\r
- // Mat(const MatExpr_Base& expr);\r
- // //! destructor - calls release()\r
- // ~Mat();\r
- // //! assignment operators\r
- // Mat& operator = (const Mat& m);\r
- // Mat& operator = (const MatExpr_Base& expr);\r
-\r
- // operator MatExpr_<Mat, Mat>() const;\r
-\r
- // //! returns a new matrix header for the specified row\r
- // Mat row(int y) const;\r
- // //! returns a new matrix header for the specified column\r
- // Mat col(int x) const;\r
- // //! ... for the specified row span\r
- // Mat rowRange(int startrow, int endrow) const;\r
- // Mat rowRange(const Range& r) const;\r
- // //! ... for the specified column span\r
- // Mat colRange(int startcol, int endcol) const;\r
- // Mat colRange(const Range& r) const;\r
- // //! ... for the specified diagonal\r
- // // (d=0 - the main diagonal,\r
- // // >0 - a diagonal from the lower half,\r
- // // <0 - a diagonal from the upper half)\r
- // Mat diag(int d=0) const;\r
- // //! constructs a square diagonal matrix which main diagonal is vector "d"\r
- // static Mat diag(const Mat& d);\r
-\r
- // //! returns deep copy of the matrix, i.e. the data is copied\r
- // Mat clone() const;\r
- // //! copies the matrix content to "m".\r
- // // It calls m.create(this->size(), this->type()).\r
- // void copyTo( Mat& m ) const;\r
- // //! copies those matrix elements to "m" that are marked with non-zero mask elements.\r
- // void copyTo( Mat& m, const Mat& mask ) const;\r
- // //! converts matrix to another datatype with optional scalng. See cvConvertScale.\r
- // void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;\r
-\r
- // void assignTo( Mat& m, int type=-1 ) const;\r
-\r
- // //! sets every matrix element to s\r
- // Mat& operator = (const Scalar& s);\r
- // //! sets some of the matrix elements to s, according to the mask\r
- // Mat& setTo(const Scalar& s, const Mat& mask=Mat());\r
- // //! creates alternative matrix header for the same data, with different\r
- // // number of channels and/or different number of rows. see cvReshape.\r
- // Mat reshape(int _cn, int _rows=0) const;\r
-\r
- // //! matrix transposition by means of matrix expressions\r
- // MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_T_<Mat> >, Mat>\r
- // t() const;\r
- // //! matrix inversion by means of matrix expressions\r
- // MatExpr_<MatExpr_Op2_<Mat, int, Mat, MatOp_Inv_<Mat> >, Mat>\r
- // inv(int method=DECOMP_LU) const;\r
- // MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>\r
- // //! per-element matrix multiplication by means of matrix expressions\r
- // mul(const Mat& m, double scale=1) const;\r
- // MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>\r
- // mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_Scale_<Mat> >, Mat>& m, double scale=1) const;\r
- // MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat> \r
- // mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_DivRS_<Mat> >, Mat>& m, double scale=1) const;\r
-\r
- // //! computes cross-product of 2 3D vectors\r
- // Mat cross(const Mat& m) const;\r
- // //! computes dot-product\r
- // double dot(const Mat& m) const;\r
-\r
- // //! Matlab-style matrix initialization\r
- // static MatExpr_Initializer zeros(int rows, int cols, int type);\r
- // static MatExpr_Initializer zeros(Size size, int type);\r
- // static MatExpr_Initializer ones(int rows, int cols, int type);\r
- // static MatExpr_Initializer ones(Size size, int type);\r
- // static MatExpr_Initializer eye(int rows, int cols, int type);\r
- // static MatExpr_Initializer eye(Size size, int type);\r
-\r
- // //! allocates new matrix data unless the matrix already has specified size and type.\r
- // // previous data is unreferenced if needed.\r
- // void create(int _rows, int _cols, int _type);\r
- // void create(Size _size, int _type);\r
- // //! increases the reference counter; use with care to avoid memleaks\r
- // void addref();\r
- // //! decreases reference counter;\r
- // // deallocate the data when reference counter reaches 0.\r
- // void release();\r
-\r
- // //! locates matrix header within a parent matrix. See below\r
- // void locateROI( Size& wholeSize, Point& ofs ) const;\r
- // //! moves/resizes the current matrix ROI inside the parent matrix.\r
- // Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );\r
- // //! extracts a rectangular sub-matrix\r
- // // (this is a generalized form of row, rowRange etc.)\r
- // Mat operator()( Range rowRange, Range colRange ) const;\r
- // Mat operator()( const Rect& roi ) const;\r
-\r
- // //! converts header to CvMat; no data is copied\r
- // operator CvMat() const;\r
- // //! converts header to IplImage; no data is copied\r
- // operator IplImage() const;\r
-\r
- // //! returns true iff the matrix data is continuous\r
- // // (i.e. when there are no gaps between successive rows).\r
- // // similar to CV_IS_MAT_CONT(cvmat->type)\r
- // bool isContinuous() const;\r
- // //! returns element size in bytes,\r
- // // similar to CV_ELEM_SIZE(cvmat->type)\r
- // size_t elemSize() const;\r
- // //! returns the size of element channel in bytes.\r
- // size_t elemSize1() const;\r
- // //! returns element type, similar to CV_MAT_TYPE(cvmat->type)\r
- // int type() const;\r
- // //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)\r
- // int depth() const;\r
- // //! returns element type, similar to CV_MAT_CN(cvmat->type)\r
- // int channels() const;\r
- // //! returns step/elemSize1()\r
- // size_t step1() const;\r
- // //! returns matrix size:\r
- // // width == number of columns, height == number of rows\r
- // Size size() const;\r
- // //! returns true if matrix data is NULL\r
- // bool empty() const;\r
-\r
- // //! returns pointer to y-th row\r
- // uchar* ptr(int y=0);\r
- // const uchar* ptr(int y=0) const;\r
-\r
- // //! template version of the above method\r
- // template<typename _Tp> _Tp* ptr(int y=0);\r
- // template<typename _Tp> const _Tp* ptr(int y=0) const;\r
-\r
- // //! template methods for read-write or read-only element access.\r
- // // note that _Tp must match the actual matrix type -\r
- // // the functions do not do any on-fly type conversion\r
- // template<typename _Tp> _Tp& at(int y, int x);\r
- // template<typename _Tp> _Tp& at(Point pt);\r
- // template<typename _Tp> const _Tp& at(int y, int x) const;\r
- // template<typename _Tp> const _Tp& at(Point pt) const;\r
- // template<typename _Tp> _Tp& at(int i);\r
- // template<typename _Tp> const _Tp& at(int i) const;\r
-\r
- // //! template methods for iteration over matrix elements.\r
- // // the iterators take care of skipping gaps in the end of rows (if any)\r
- // template<typename _Tp> MatIterator_<_Tp> begin();\r
- // template<typename _Tp> MatIterator_<_Tp> end();\r
- // template<typename _Tp> MatConstIterator_<_Tp> begin() const;\r
- // template<typename _Tp> MatConstIterator_<_Tp> end() const;\r
-\r
- // enum { MAGIC_VAL=0x42FF0000, AUTO_STEP=0, CONTINUOUS_FLAG=CV_MAT_CONT_FLAG };\r
-\r
- // /*! includes several bit-fields:\r
- // - the magic signature\r
- // - continuity flag\r
- // - depth\r
- // - number of channels\r
- // */\r
- // int flags;\r
- // //! the number of rows and columns\r
- // int rows, cols;\r
- // //! a distance between successive rows in bytes; includes the gap if any\r
- // size_t step;\r
- // //! pointer to the data\r
- // uchar* data;\r
-\r
- // //! pointer to the reference counter;\r
- // // when matrix points to user-allocated data, the pointer is NULL\r
- // int* refcount;\r
-\r
- // //! helper fields used in locateROI and adjustROI\r
- // uchar* datastart;\r
- // uchar* dataend;\r
- //};\r
- }\r
-}\r
-\r
-\r
-#endif /* __OPENCV_GPU_MATPL_HPP__ */
\ No newline at end of file
#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__\r
#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__\r
\r
-\r
namespace cv\r
{\r
\r
namespace gpu\r
{\r
\r
+////////////////////////////////////////////////////////////////////////\r
//////////////////////////////// GpuMat ////////////////////////////////\r
+////////////////////////////////////////////////////////////////////////\r
\r
-inline GpuMat::GpuMat()\r
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
+inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
\r
-inline GpuMat::GpuMat(int _rows, int _cols, int _type)\r
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+inline GpuMat::GpuMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
{\r
if( _rows > 0 && _cols > 0 )\r
create( _rows, _cols, _type );\r
}\r
\r
-inline GpuMat::GpuMat(Size _size, int _type)\r
- : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+inline GpuMat::GpuMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
{\r
if( _size.height > 0 && _size.width > 0 )\r
create( _size.height, _size.width, _type );\r
\r
//CPP GpuMat& GpuMat::operator = (const Scalar& s);\r
//CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat());\r
-\r
//CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const;\r
-\r
-//CPP void GpuMat::create(int _rows, int _cols, int _type);\r
inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); }\r
-\r
+//CPP void GpuMat::create(int _rows, int _cols, int _type);\r
//CPP void GpuMat::release();\r
\r
inline void GpuMat::swap(GpuMat& b) \r
static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }\r
\r
\r
+///////////////////////////////////////////////////////////////////////\r
+//////////////////////////////// MatPL ////////////////////////////////\r
+///////////////////////////////////////////////////////////////////////\r
+\r
+MatPL::MatPL() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
+MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+ if( _rows > 0 && _cols > 0 )\r
+ create( _rows, _cols, _type );\r
+}\r
+\r
+MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+ if( _size.height > 0 && _size.width > 0 )\r
+ create( _size.height, _size.width, _type );\r
+}\r
+\r
+MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)\r
+{\r
+ if( refcount )\r
+ CV_XADD(refcount, 1);\r
+\r
+}\r
+\r
+MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+ if( m.rows > 0 && m.cols > 0 )\r
+ create( m.size(), m.type() );\r
+\r
+ Mat tmp = createMatHeader();\r
+ m.copyTo(tmp);\r
+}\r
+\r
+MatPL::~MatPL()\r
+{\r
+ release();\r
+}\r
+MatPL& MatPL::operator = (const MatPL& m)\r
+{\r
+ if( this != &m )\r
+ {\r
+ if( m.refcount )\r
+ CV_XADD(m.refcount, 1);\r
+ release();\r
+ flags = m.flags;\r
+ rows = m.rows; cols = m.cols;\r
+ step = m.step; data = m.data; \r
+ datastart = m.datastart;\r
+ dataend = m.dataend;\r
+ refcount = m.refcount;\r
+ }\r
+ return *this;\r
+}\r
+\r
+MatPL MatPL::clone() const\r
+{\r
+ MatPL m(size(), type()); \r
+ Mat to = m;\r
+ Mat from = *this;\r
+ from.copyTo(to);\r
+ return m;\r
+}\r
+\r
+inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); } \r
+//CCP void MatPL::create(int _rows, int _cols, int _type); \r
+//CPP void MatPL::release();\r
+\r
+inline Mat MatPL::createMatHeader() const { return Mat(size(), type(), data); }\r
+inline MatPL::operator Mat() const { return createMatHeader(); }\r
+\r
+inline bool MatPL::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }\r
+inline size_t MatPL::elemSize() const { return CV_ELEM_SIZE(flags); }\r
+inline size_t MatPL::elemSize1() const { return CV_ELEM_SIZE1(flags); }\r
+inline int MatPL::type() const { return CV_MAT_TYPE(flags); }\r
+inline int MatPL::depth() const { return CV_MAT_DEPTH(flags); }\r
+inline int MatPL::channels() const { return CV_MAT_CN(flags); }\r
+inline size_t MatPL::step1() const { return step/elemSize1(); }\r
+inline Size MatPL::size() const { return Size(cols, rows); }\r
+inline bool MatPL::empty() const { return data == 0; } \r
+\r
+\r
} /* end of namespace gpu */\r
\r
} /* end of namespace cv */\r
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+// By downloading, copying, installing or using the software you agree to this license.\r
+// If you do not agree to this license, do not download, install,\r
+// copy or use the software.\r
+//\r
+//\r
+// License Agreement\r
+// For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+// * Redistribution's of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+//\r
+// * Redistribution's in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other GpuMaterials provided with the distribution.\r
+//\r
+// * The name of the copyright holders may not be used to endorse or promote products\r
+// derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__\r
+#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__\r
+\r
+#include "opencv2/gpu/gpu.hpp"\r
+#include "cuda_runtime_api.h"\r
+\r
+namespace cv\r
+{\r
+ namespace gpu\r
+ {\r
+ // This is only header file that depends on Cuda. All other headers are independent.\r
+ // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.\r
+ // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.\r
+ // In this case you have to install Cuda Toolkit.\r
+ struct StreamAccessor\r
+ {\r
+ CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream);\r
+ };\r
+ }\r
+}\r
+\r
+#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
\ No newline at end of file
#define __OPENCV_CUDA_SHARED_HPP__\r
\r
#include "opencv2/gpu/devmem2d.hpp"\r
+#include "cuda_runtime_api.h" \r
\r
namespace cv\r
{\r
namespace gpu\r
- {\r
+ { \r
typedef unsigned char uchar;\r
typedef unsigned short ushort;\r
- typedef unsigned int uint;\r
+ typedef unsigned int uint; \r
\r
extern "C" void error( const char *error_string, const char *file, const int line, const char *func = "");\r
\r
namespace impl\r
- {\r
+ { \r
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }\r
\r
extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_<uint>& minSSD_buf);\r
-\r
- extern "C" void set_to_without_mask (const DevMem2D& mat, const double * scalar, int depth, int channels);\r
- extern "C" void set_to_with_mask (const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels);\r
}\r
}\r
}\r
\r
-#ifdef __CUDACC__\r
- #define cudaSafeCall(expr) { cudaError_t err = expr; if( cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); }\r
+#if defined(__GNUC__)\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__);\r
+#else /* defined(__CUDACC__) || defined(__MSVC__) */\r
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__) \r
#endif\r
\r
+ static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
+ {\r
+ if( cudaSuccess != err) \r
+ cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, func);\r
+ }\r
+\r
#endif /* __OPENCV_CUDA_SHARED_HPP__ */\r
//M*/\r
\r
#include "precomp.hpp"\r
-//#include "opencv2/gpu/stream_access.hpp"\r
\r
using namespace cv;\r
using namespace cv::gpu;\r
\r
\r
-cv::gpu::CudaStream::CudaStream() //: impl( (Impl*)fastMalloc(sizeof(Impl)) )\r
+#if !defined (HAVE_CUDA)\r
+\r
+void cv::gpu::CudaStream::create() { throw_nogpu(); }\r
+void cv::gpu::CudaStream::release() { throw_nogpu(); }\r
+cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); }\r
+cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); }\r
+cv::gpu::CudaStream::CudaStream(const CudaStream& stream) { throw_nogpu(); }\r
+CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) { throw_nogpu(); return *this; }\r
+bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; }\r
+void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b) { throw_nogpu(); }\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
+\r
+#include "opencv2/gpu/stream_accessor.hpp"\r
+\r
+struct CudaStream::Impl\r
{\r
- //cudaSafeCall( cudaStreamCreate( &impl->stream) );\r
-}\r
-cv::gpu::CudaStream::~CudaStream()\r
+ cudaStream_t stream;\r
+ int ref_counter;\r
+};\r
+namespace \r
{\r
- if (impl)\r
+ template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)\r
{\r
- cudaSafeCall( cudaStreamDestroy( *(cudaStream_t*)impl ) );\r
- cv::fastFree( impl );\r
- }\r
+ dst.create(src.size(), src.type());\r
+ size_t bwidth = src.cols * src.elemSize();\r
+ cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) ); \r
+ };\r
}\r
\r
-bool cv::gpu::CudaStream::queryIfComplete()\r
+CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; };\r
+\r
+void cv::gpu::CudaStream::create()\r
{\r
- //cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl );\r
+ if (impl)\r
+ release();\r
\r
- //if (err == cudaSuccess)\r
- // return true;\r
+ cudaStream_t stream;\r
+ cudaSafeCall( cudaStreamCreate( &stream ) );\r
\r
- //if (err == cudaErrorNotReady)\r
- // return false;\r
+ impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl));\r
\r
- ////cudaErrorInvalidResourceHandle\r
- //cudaSafeCall( err );\r
- return true;\r
+ impl->stream = stream;\r
+ impl->ref_counter = 1; \r
}\r
-void cv::gpu::CudaStream::waitForCompletion()\r
+\r
+void cv::gpu::CudaStream::release()\r
{\r
- cudaSafeCall( cudaStreamSynchronize( *(cudaStream_t*)impl ) );\r
+ if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )\r
+ {\r
+ cudaSafeCall( cudaStreamDestroy( impl->stream ) );\r
+ cv::fastFree( impl );\r
+ }\r
}\r
\r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)\r
+cv::gpu::CudaStream::CudaStream() : impl(0) { create(); }\r
+cv::gpu::CudaStream::~CudaStream() { release(); }\r
+\r
+cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl)\r
{\r
-// cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost,\r
+ if( impl )\r
+ CV_XADD(&impl->ref_counter, 1);\r
}\r
-void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst)\r
+CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)\r
{\r
- CV_Assert(!"Not implemented");\r
+ if( this != &stream )\r
+ {\r
+ if( stream.impl )\r
+ CV_XADD(&stream.impl->ref_counter, 1);\r
+\r
+ release();\r
+ impl = stream.impl; \r
+ }\r
+ return *this;\r
}\r
-void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst)\r
+\r
+bool cv::gpu::CudaStream::queryIfComplete()\r
{\r
- CV_Assert(!"Not implemented");\r
+ cudaError_t err = cudaStreamQuery( impl->stream );\r
+\r
+ if (err == cudaErrorNotReady || err == cudaSuccess)\r
+ return err == cudaSuccess;\r
+\r
+ cudaSafeCall(err);\r
}\r
\r
+void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }\r
+\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) \r
+{ \r
+ // if not -> allocation will be done, but after that dst will not point to page locked memory\r
+ CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )\r
+ devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); \r
+}\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }\r
+\r
+void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }\r
+void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); } \r
+void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }\r
+\r
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)\r
{\r
CV_Assert(!"Not implemented");\r
CV_Assert(!"Not implemented");\r
}\r
\r
-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type)\r
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b)\r
{\r
CV_Assert(!"Not implemented");\r
}\r
\r
-//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; }\r
-\r
\r
+#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
using namespace cv;\r
using namespace cv::gpu;\r
\r
-#ifndef HAVE_CUDA\r
+\r
+#if !defined (HAVE_CUDA)\r
\r
CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() { return 0; }\r
-CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/) { cudaSafeCall(0); return 0; } \r
-CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { cudaSafeCall(0); } \r
-CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { cudaSafeCall(0); } \r
-CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { cudaSafeCall(0); return 0; } \r
+CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/) { throw_nogpu(); return 0; } \r
+CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); } \r
+CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; } \r
+CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); } \r
+CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; } \r
+\r
\r
-#else\r
+#else /* !defined (HAVE_CUDA) */\r
\r
CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount()\r
{\r
{\r
cudaSafeCall( cudaSetDevice( device ) );\r
}\r
+CV_EXPORTS int cv::gpu::getDevice()\r
+{\r
+ int device; \r
+ cudaSafeCall( cudaGetDevice( &device ) );\r
+ return device;\r
+}\r
\r
CV_EXPORTS void cv::gpu::getComputeCapability(int device, int* major, int* minor)\r
{\r
return prop.multiProcessorCount;\r
}\r
\r
-#endif
\ No newline at end of file
+#endif\r
+\r
using namespace cv;\r
using namespace cv::gpu;\r
\r
+////////////////////////////////////////////////////////////////////////\r
//////////////////////////////// GpuMat ////////////////////////////////\r
+////////////////////////////////////////////////////////////////////////\r
\r
-void GpuMat::upload(const Mat& m)\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+namespace cv\r
+{\r
+ namespace gpu\r
+ {\r
+ void GpuMat::upload(const Mat& /*m*/) { throw_nogpu(); }\r
+ void GpuMat::download(cv::Mat& /*m*/) const { throw_nogpu(); }\r
+ void GpuMat::copyTo( GpuMat& /*m*/ ) const { throw_nogpu(); }\r
+ void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const { throw_nogpu(); }\r
+ void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { throw_nogpu(); }\r
+ GpuMat& GpuMat::operator = (const Scalar& /*s*/) { throw_nogpu(); return *this; }\r
+ GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) { throw_nogpu(); return *this; }\r
+ GpuMat GpuMat::reshape(int /*new_cn*/, int /*new_rows*/) const { throw_nogpu(); return GpuMat(); }\r
+ void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }\r
+ void GpuMat::release() { throw_nogpu(); }\r
+\r
+ void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }\r
+ void MatPL::release() { throw_nogpu(); }\r
+ }\r
+\r
+}\r
+\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
+\r
+\r
+void cv::gpu::GpuMat::upload(const Mat& m)\r
{\r
CV_DbgAssert(!m.empty());\r
create(m.size(), m.type());\r
cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );\r
}\r
\r
-void GpuMat::download(cv::Mat& m) const\r
+void cv::gpu::GpuMat::download(cv::Mat& m) const\r
{\r
CV_DbgAssert(!this->empty());\r
m.create(size(), type());\r
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );\r
}\r
\r
-void GpuMat::copyTo( GpuMat& m ) const\r
+void cv::gpu::GpuMat::copyTo( GpuMat& m ) const\r
{\r
CV_DbgAssert(!this->empty());\r
m.create(size(), type());\r
cudaSafeCall( cudaThreadSynchronize() );\r
}\r
\r
-void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const\r
-{\r
+void cv::gpu::GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const\r
+{ \r
CV_Assert(!"Not implemented");\r
}\r
\r
-void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const\r
+void cv::gpu::GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const\r
{\r
CV_Assert(!"Not implemented");\r
}\r
\r
-GpuMat& GpuMat::operator = (const Scalar& s)\r
+GpuMat& cv::gpu::GpuMat::operator = (const Scalar& /*s*/)\r
{\r
- cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
+ CV_Assert(!"Not implemented"); \r
return *this;\r
}\r
\r
-GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)\r
+GpuMat& cv::gpu::GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/)\r
{\r
- CV_Assert(mask.type() == CV_32F);\r
-\r
- CV_DbgAssert(!this->empty());\r
-\r
- this->channels();\r
- this->depth();\r
-\r
- if (mask.empty())\r
- {\r
- cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
- }\r
- else\r
- {\r
- cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->depth(), this->channels());\r
- }\r
-\r
+ CV_Assert(!"Not implemented"); \r
return *this;\r
}\r
\r
\r
-GpuMat GpuMat::reshape(int new_cn, int new_rows) const\r
+GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const\r
{\r
GpuMat hdr = *this;\r
\r
return hdr;\r
}\r
\r
-void GpuMat::create(int _rows, int _cols, int _type)\r
+void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)\r
{\r
_type &= TYPE_MASK;\r
if( rows == _rows && cols == _cols && type() == _type && data )\r
rows = _rows;\r
cols = _cols;\r
\r
- size_t esz = elemSize();\r
+ size_t esz = elemSize(); \r
\r
void *dev_ptr;\r
cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) );\r
size_t nettosize = (size_t)_nettosize;\r
\r
datastart = data = (uchar*)dev_ptr;\r
- dataend = data + nettosize;\r
+ dataend = data + nettosize; \r
\r
refcount = (int*)fastMalloc(sizeof(*refcount));\r
*refcount = 1;\r
}\r
}\r
\r
-void GpuMat::release()\r
+void cv::gpu::GpuMat::release()\r
{\r
if( refcount && CV_XADD(refcount, -1) == 1 )\r
{\r
fastFree(refcount);\r
- cudaSafeCall( cudaFree(datastart) );\r
+ cudaSafeCall( cudaFree(datastart) ); \r
}\r
data = datastart = dataend = 0;\r
step = rows = cols = 0;\r
}\r
\r
\r
+///////////////////////////////////////////////////////////////////////\r
+//////////////////////////////// MatPL ////////////////////////////////\r
+///////////////////////////////////////////////////////////////////////\r
+\r
+void cv::gpu::MatPL::create(int _rows, int _cols, int _type)\r
+{\r
+ _type &= TYPE_MASK;\r
+ if( rows == _rows && cols == _cols && type() == _type && data )\r
+ return;\r
+ if( data )\r
+ release();\r
+ CV_DbgAssert( _rows >= 0 && _cols >= 0 );\r
+ if( _rows > 0 && _cols > 0 )\r
+ {\r
+ flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type;\r
+ rows = _rows;\r
+ cols = _cols;\r
+ step = elemSize()*cols;\r
+ int64 _nettosize = (int64)step*rows;\r
+ size_t nettosize = (size_t)_nettosize;\r
+ if( _nettosize != (int64)nettosize )\r
+ CV_Error(CV_StsNoMem, "Too big buffer is allocated");\r
+ size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));\r
+\r
+ //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount)); \r
+ void *ptr;\r
+ cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );\r
\r
+ datastart = data = (uchar*)ptr; \r
+ dataend = data + nettosize; \r
\r
+ refcount = (int*)cv::fastMalloc(sizeof(*refcount));\r
+ *refcount = 1;\r
+ }\r
+}\r
\r
+void cv::gpu::MatPL::release()\r
+{\r
+ if( refcount && CV_XADD(refcount, -1) == 1 )\r
+ {\r
+ cudaSafeCall( cudaFreeHost(datastart ) );\r
+ fastFree(refcount);\r
+ }\r
+ data = datastart = dataend = 0;\r
+ step = rows = cols = 0;\r
+ refcount = 0;\r
+}\r
\r
+#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
/* End of file. */\r
\r
\r
-extern "C" void cv::gpu::error( const char *error_string, const char *file, const int line, const char *func)\r
-{ \r
- cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) );\r
-} \r
+namespace cv\r
+{\r
+ namespace gpu\r
+ {\r
+ extern "C" void error(const char *error_string, const char *file, const int line, const char *func)\r
+ { \r
+ cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) );\r
+ }\r
+ }\r
+}\r
#include <iostream>\r
\r
#include "opencv2/gpu/gpu.hpp"\r
-#include "cuda_shared.hpp"\r
\r
-#ifndef HAVE_CUDA\r
\r
- #define cudaSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")\r
- #define cudaCallerSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")\r
+#if defined(HAVE_CUDA)\r
\r
-#else /* HAVE_CUDA */\r
+ #include "cuda_shared.hpp"\r
+ #include "cuda_runtime_api.h" \r
\r
-#if _MSC_VER >= 1200 \r
- #pragma warning (disable : 4100 4211 4201 4408) \r
-#endif\r
-\r
-#include "cuda_runtime_api.h"\r
-\r
-#ifdef __GNUC__ \r
- #define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, __func__); } \r
-#else \r
- #define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); } \r
-#endif\r
+#else /* defined(HAVE_CUDA) */\r
\r
-#define cudaCallerSafeCall(expr) expr;\r
+ static inline void throw_nogpu() { CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support"); }\r
+ \r
+#endif /* defined(HAVE_CUDA) */\r
\r
-\r
-#endif /* HAVE_CUDA */\r
-\r
-#endif\r
+#endif /* __OPENCV_PRECOMP_H__ */ \r
\r
using namespace cv;\r
using namespace cv::gpu;\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_nogpu(); }\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) { throw_nogpu(); }\r
+\r
+bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) { throw_nogpu(); }\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) { throw_nogpu(); }\r
+\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
\r
-StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {}\r
-StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) \r
+cv::gpu::StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64) {}\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) \r
{\r
const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);\r
CV_Assert(ndisp <= max_supported_ndisp);\r
+ CV_Assert(ndisp % 8 == 0);\r
+}\r
+\r
+bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()\r
+{\r
+ if (0 == getCudaEnabledDeviceCount())\r
+ return false;\r
+\r
+ int device = getDevice();\r
+\r
+ int minor, major;\r
+ getComputeCapability(device, &major, &minor);\r
+ int numSM = getNumberOfSMs(device);\r
+\r
+ if (major > 1 || numSM > 16)\r
+ return true; \r
+\r
+ return false;\r
}\r
\r
-void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)\r
{\r
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);\r
CV_DbgAssert(left.type() == CV_8UC1);\r
} \r
\r
DevMem2D disp = disparity;\r
- DevMem2D_<uint> mssd = minSSD; \r
- cudaCallerSafeCall( impl::stereoBM_GPU(left, right, disp, ndisp, mssd) ); \r
+ DevMem2D_<unsigned int> mssd = minSSD; \r
+ impl::stereoBM_GPU(left, right, disp, ndisp, mssd); \r
}\r
+\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)\r
+{\r
+ CV_Assert(!"Not implemented");\r
+}\r
+\r
+#endif /* !defined (HAVE_CUDA) */
\ No newline at end of file