# ====================================================================\r
# Link libraries: e.g. opencv_core220.so, opencv_imgproc220d.lib, etc...\r
# ====================================================================\r
-set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib)\r
+set(OPENCV_LIB_COMPONENTS opencv_core opencv_imgproc opencv_features2d opencv_gpu opencv_calib3d opencv_objdetect opencv_video opencv_highgui opencv_ml opencv_legacy opencv_contrib)\r
SET(OpenCV_LIBS "")\r
foreach(__CVLIB ${OPENCV_LIB_COMPONENTS})\r
# CMake>=2.6 supports the notation "debug XXd optimized XX"\r
add_subdirectory(traincascade)
-#add_subdirectory(gpu)
+add_subdirectory(gpu)
CV_EXPORTS void getGpuMemInfo(size_t *free, size_t* total);\r
\r
//////////////////////////////// GpuMat ////////////////////////////////\r
- class CudaStream;\r
+ class Stream;\r
class MatPL;\r
\r
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.\r
\r
//! pefroms blocking upload data to GpuMat. .\r
void upload(const cv::Mat& m);\r
- void upload(const MatPL& m, CudaStream& stream);\r
+ void upload(const MatPL& m, Stream& stream);\r
\r
//! Downloads data from device to host memory. Blocking calls.\r
operator Mat() const;\r
void download(cv::Mat& m) const;\r
- void download(MatPL& m, CudaStream& stream) const;\r
+ void download(MatPL& m, Stream& stream) const;\r
\r
//! returns a new GpuMatrix header for the specified row\r
GpuMat row(int y) const;\r
// Passed to each function that supports async kernel execution.\r
// Reference counting is enabled\r
\r
- class CV_EXPORTS CudaStream\r
+ class CV_EXPORTS Stream\r
{\r
public:\r
- CudaStream();\r
- ~CudaStream();\r
+ Stream();\r
+ ~Stream();\r
\r
- CudaStream(const CudaStream&);\r
- CudaStream& operator=(const CudaStream&);\r
+ Stream(const Stream&);\r
+ Stream& operator=(const Stream&);\r
\r
bool queryIfComplete();\r
void waitForCompletion();\r
void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);\r
\r
//! Acync version\r
- void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream & stream);\r
+ void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream & stream);\r
\r
//! Some heuristics that tries to estmate\r
// if current GPU will be faster then CPU in this algorithm.\r
enum { DEFAULT_LEVELS = 5 };\r
\r
//! the default constructor\r
- explicit StereoBeliefPropagation_GPU(int ndisp_ = DEFAULT_NDISP, \r
- int iters_ = DEFAULT_ITERS, \r
- int levels_ = DEFAULT_LEVELS,\r
- int msg_type_ = MSG_TYPE_AUTO,\r
+ explicit StereoBeliefPropagation_GPU(int ndisp = DEFAULT_NDISP, \r
+ int iters = DEFAULT_ITERS, \r
+ int levels = DEFAULT_LEVELS,\r
+ int msg_type = MSG_TYPE_AUTO,\r
float msg_scale = 1.0f);\r
//! the full constructor taking the number of disparities, number of BP iterations on each level,\r
//! number of levels, truncation of data cost, data weight, \r
//! truncation of discontinuity cost and discontinuity single jump\r
- StereoBeliefPropagation_GPU(int ndisp_, int iters_, int levels_, \r
- float max_data_term_, float data_weight_,\r
- float max_disc_term_, float disc_single_jump_,\r
- int msg_type_ = MSG_TYPE_AUTO,\r
+ StereoBeliefPropagation_GPU(int ndisp, int iters, int levels, \r
+ float max_data_term, float data_weight,\r
+ float max_disc_term, float disc_single_jump,\r
+ int msg_type = MSG_TYPE_AUTO,\r
float msg_scale = 1.0f);\r
\r
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,\r
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity);\r
\r
//! Acync version\r
- void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);\r
+ void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream);\r
\r
//! Some heuristics that tries to estmate\r
//! if current GPU will be faster then CPU in this algorithm.\r
// In this case you have to install Cuda Toolkit.\r
struct StreamAccessor\r
{\r
- CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream);\r
+ CV_EXPORTS static cudaStream_t getStream(const Stream& stream);\r
};\r
}\r
}\r
cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, float, float, float, float, int, float) { throw_nogpu(); }\r
\r
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
-void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); }\r
+void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }\r
\r
bool cv::gpu::StereoBeliefPropagation_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }\r
\r
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, 0);\r
}\r
\r
-void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const CudaStream& stream)\r
+void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const Stream& stream)\r
{\r
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, StreamAccessor::getStream(stream));\r
}\r
\r
using namespace cv::gpu;\r
\r
+\r
+/////////////////////////////////// Remap ///////////////////////////////////////////////\r
namespace imgproc\r
{\r
- texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1;\r
+ texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;\r
\r
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height)\r
{ \r
\r
float xcoo = mapx[idx];\r
float ycoo = mapy[idx];\r
- \r
- out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo)); \r
+\r
+ out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)); \r
}\r
}\r
\r
- texture< uchar4, 2, cudaReadModeElementType > tex_meanshift;\r
+}\r
+\r
+namespace cv { namespace gpu { namespace impl \r
+{\r
+ extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)\r
+ {\r
+ dim3 block(16, 16, 1);\r
+ dim3 grid(1, 1, 1);\r
+ grid.x = divUp(dst.cols, block.x);\r
+ grid.y = divUp(dst.rows, block.y);\r
+\r
+ imgproc::tex_remap.filterMode = cudaFilterModeLinear; \r
+ imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;\r
+ cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();\r
+ cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, dst.cols, dst.rows, src.step) );\r
+\r
+ imgproc::kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
+\r
+ cudaSafeCall( cudaThreadSynchronize() ); \r
+ cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );\r
+ }\r
+}}}\r
+\r
+\r
+/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////\r
+\r
+namespace imgproc\r
+{\r
+ texture<uchar4, 2> tex_meanshift;\r
\r
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )\r
{\r
\r
if( x0 < cols && y0 < rows )\r
{\r
-\r
int isr2 = sr*sr;\r
- uchar4 c = tex2D( tex_meanshift, x0, y0 );\r
+ uchar4 c = tex2D(tex_meanshift, x0, y0 );\r
// iterate meanshift procedure\r
for( int iter = 0; iter < maxIter; iter++ )\r
{\r
\r
namespace cv { namespace gpu { namespace impl \r
{\r
- using namespace imgproc;\r
-\r
- extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)\r
- {\r
- dim3 block(16, 16, 1);\r
- dim3 grid(1, 1, 1);\r
- grid.x = divUp(dst.cols, block.x);\r
- grid.y = divUp(dst.rows, block.y);\r
-\r
- tex1.filterMode = cudaFilterModeLinear; \r
- tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap;\r
- cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();\r
- cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) );\r
-\r
- kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);\r
-\r
- cudaSafeCall( cudaThreadSynchronize() ); \r
- cudaSafeCall( cudaUnbindTexture(tex1) );\r
- }\r
-\r
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps)\r
{ \r
dim3 grid(1, 1, 1);\r
grid.y = divUp(src.rows, threads.y);\r
\r
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();\r
- cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
+ cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );\r
\r
- meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );\r
+ imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );\r
cudaSafeCall( cudaThreadSynchronize() );\r
- cudaSafeCall( cudaUnbindTexture( tex_meanshift ) ); \r
+ cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) ); \r
}\r
}}}\r
\r
\r
#if !defined (HAVE_CUDA)\r
\r
-void cv::gpu::CudaStream::create() { throw_nogpu(); }\r
-void cv::gpu::CudaStream::release() { throw_nogpu(); }\r
-cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); }\r
-cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); }\r
-cv::gpu::CudaStream::CudaStream(const CudaStream& /*stream*/) { throw_nogpu(); }\r
-CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& /*stream*/) { throw_nogpu(); return *this; }\r
-bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; }\r
-void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }\r
-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::create() { throw_nogpu(); }\r
+void cv::gpu::Stream::release() { throw_nogpu(); }\r
+cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }\r
+cv::gpu::Stream::~Stream() { throw_nogpu(); }\r
+cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }\r
+Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }\r
+bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }\r
+void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }\r
+void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
#include "opencv2/gpu/stream_accessor.hpp"\r
\r
-struct CudaStream::Impl\r
+struct Stream::Impl\r
{\r
cudaStream_t stream;\r
int ref_counter;\r
};\r
}\r
\r
-CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; };\r
+CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; };\r
\r
-void cv::gpu::CudaStream::create()\r
+void cv::gpu::Stream::create()\r
{\r
if (impl)\r
release();\r
cudaStream_t stream;\r
cudaSafeCall( cudaStreamCreate( &stream ) );\r
\r
- impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl));\r
+ impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));\r
\r
impl->stream = stream;\r
impl->ref_counter = 1;\r
}\r
\r
-void cv::gpu::CudaStream::release()\r
+void cv::gpu::Stream::release()\r
{\r
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )\r
{\r
}\r
}\r
\r
-cv::gpu::CudaStream::CudaStream() : impl(0) { create(); }\r
-cv::gpu::CudaStream::~CudaStream() { release(); }\r
+cv::gpu::Stream::Stream() : impl(0) { create(); }\r
+cv::gpu::Stream::~Stream() { release(); }\r
\r
-cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl)\r
+cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)\r
{\r
if( impl )\r
CV_XADD(&impl->ref_counter, 1);\r
}\r
-CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)\r
+Stream& cv::gpu::Stream::operator=(const Stream& stream)\r
{\r
if( this != &stream )\r
{\r
return *this;\r
}\r
\r
-bool cv::gpu::CudaStream::queryIfComplete()\r
+bool cv::gpu::Stream::queryIfComplete()\r
{\r
cudaError_t err = cudaStreamQuery( impl->stream );\r
\r
return false;\r
}\r
\r
-void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }\r
+void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }\r
\r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)\r
+void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)\r
{\r
// if not -> allocation will be done, but after that dst will not point to page locked memory\r
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )\r
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);\r
}\r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }\r
+void cv::gpu::Stream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }\r
\r
-void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }\r
-void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }\r
-void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }\r
+void cv::gpu::Stream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }\r
+void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }\r
+void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }\r
\r
-void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)\r
+void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val)\r
{\r
impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream);\r
}\r
\r
-void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)\r
+void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)\r
{\r
impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream);\r
}\r
\r
-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)\r
+void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)\r
{\r
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();\r
\r
cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );\r
}\r
\r
-void cv::gpu::GpuMat::upload(const MatPL& m, CudaStream& stream)\r
+void cv::gpu::GpuMat::upload(const MatPL& m, Stream& stream)\r
{\r
CV_DbgAssert(!m.empty());\r
stream.enqueueUpload(m, *this);\r
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );\r
}\r
\r
-void cv::gpu::GpuMat::download(MatPL& m, CudaStream& stream) const\r
+void cv::gpu::GpuMat::download(MatPL& m, Stream& stream) const\r
{\r
CV_DbgAssert(!m.empty());\r
stream.enqueueDownload(*this, m);\r
\r
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }\r
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }\r
-void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); }\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }\r
\r
#else /* !defined (HAVE_CUDA) */\r
\r
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);\r
}\r
\r
-void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream)\r
{\r
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));\r
}\r
//int64 time = getTickCount();
- CudaStream stream;
+ Stream stream;
stream.enqueueCopy(gmat0, gmat1);
stream.enqueueCopy(gmat0, gmat2);
stream.enqueueCopy(gmat0, gmat3);