compilation with no cuda re factored

author Anatoly Baksheev <no@email>

Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)

committer Anatoly Baksheev <no@email>

Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)
author Anatoly Baksheev <no@email>
Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)
committer Anatoly Baksheev <no@email>
Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)
diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt

index 4e23b7c..0e303fd 100644 (file)
--- a/modules/gpu/CMakeLists.txt
+++ b/modules/gpu/CMakeLists.txt
@@ -1,6 +1,7 @@
-set(name "gpu")\r
-set(DEPS "opencv_core")\r
  \r
+set(name "gpu")        \r
+set(DEPS "opencv_core")\r
+       \r
  \r
  set(the_target "opencv_${name}")\r
  \r
@@ -9,25 +10,25 @@ project(${the_target})
  add_definitions(-DCVAPI_EXPORTS)\r
  \r
  include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"\r
-                                       "${CMAKE_CURRENT_SOURCE_DIR}/cuda"\r
+                                       "${CMAKE_CURRENT_SOURCE_DIR}/src/cuda"\r
                                         "${CMAKE_CURRENT_SOURCE_DIR}/src"\r
                                         "${CMAKE_CURRENT_BINARY_DIR}")\r
  \r
  foreach(d ${DEPS})\r
-       if(${d} MATCHES "opencv_")\r
+       if(${d} MATCHES "opencv_")                      \r
                 string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})\r
-               include_directories("${d_dir}/include")\r
+               include_directories("${d_dir}/include")                 \r
         endif()\r
-endforeach()\r
+endforeach()           \r
  \r
  file(GLOB lib_srcs "src/*.cpp")\r
  file(GLOB lib_int_hdrs "src/*.h*")\r
-file(GLOB lib_cuda "cuda/*.cu")\r
-file(GLOB lib_cuda_hdrs "cuda/*.h*")\r
+file(GLOB lib_cuda "src/cuda/*.cu")\r
+file(GLOB lib_cuda_hdrs "src/cuda/*.h*")       \r
  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})\r
  source_group("Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})\r
  \r
-file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")\r
+file(GLOB lib_hdrs "include/opencv2/${name}/*.h*")             \r
  source_group("Include" FILES ${lib_hdrs})\r
  \r
  if (HAVE_CUDA)\r
@@ -35,13 +36,13 @@ if (HAVE_CUDA)
         link_directories(${CUDA_LIBRARIES})\r
  \r
         if (UNIX OR APPLE)\r
-               set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC")\r
-       endif()\r
-\r
+               set (CUDA_NVCC_FLAGS "-Xcompiler;-fPIC;")\r
+       endif() \r
+                       \r
         CUDA_COMPILE(cuda_objs ${lib_cuda})\r
         #CUDA_BUILD_CLEAN_TARGET()\r
  endif()\r
-\r
+       \r
  \r
  add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${cuda_objs})\r
  \r
@@ -50,7 +51,7 @@ if(PCHSupport_FOUND)
         if(${CMAKE_GENERATOR} MATCHES "Visual*" OR ${CMAKE_GENERATOR} MATCHES "Xcode*")\r
                 if(${CMAKE_GENERATOR} MATCHES "Visual*")\r
                         set(${the_target}_pch "src/precomp.cpp")\r
-               endif()\r
+               endif()            \r
                 add_native_precompiled_header(${the_target} ${pch_header})\r
         elseif(CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_GENERATOR} MATCHES ".*Makefiles")\r
                 add_precompiled_header(${the_target} ${pch_header})\r
diff --git a/modules/gpu/cuda/mat_operators.cu b/modules/gpu/cuda/mat_operators.cu

deleted file mode 100644 (file)

index d1ac7e3..0000000
--- a/modules/gpu/cuda/mat_operators.cu
+++ /dev/null
@@ -1,151 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////
-//
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
-//
-//  By downloading, copying, installing or using the software you agree to this license.
-//  If you do not agree to this license, do not download, install,
-//  copy or use the software.
-//
-//
-//                           License Agreement
-//                For Open Source Computer Vision Library
-//
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
-// Third party copyrights are property of their respective owners.
-//
-// Redistribution and use in source and binary forms, with or without modification,
-// are permitted provided that the following conditions are met:
-//
-//   * Redistribution's of source code must retain the above copyright notice,
-//     this list of conditions and the following disclaimer.
-//
-//   * Redistribution's in binary form must reproduce the above copyright notice,
-//     this list of conditions and the following disclaimer in the documentation
-//     and/or other materials provided with the distribution.
-//
-//   * The name of the copyright holders may not be used to endorse or promote products
-//     derived from this software without specific prior written permission.
-//
-// This software is provided by the copyright holders and contributors "as is" and
-// any express or implied warranties, including, but not limited to, the implied
-// warranties of merchantability and fitness for a particular purpose are disclaimed.
-// In no event shall the Intel Corporation or contributors be liable for any direct,
-// indirect, incidental, special, exemplary, or consequential damages
-// (including, but not limited to, procurement of substitute goods or services;
-// loss of use, data, or profits; or business interruption) however caused
-// and on any theory of liability, whether in contract, strict liability,
-// or tort (including negligence or otherwise) arising in any way out of
-// the use of this software, even if advised of the possibility of such damage.
-//
-//M*/
-
-#include <stddef.h>
-#include "cuda_shared.hpp"
-#include "cuda_runtime.h"
-
-__constant__ float scalar_d[4];
-
-namespace mat_operators
-{
-
-    template <typename T, int channels, int count = channels>
-    struct unroll
-    {
-        __device__ static void unroll_set(T * mat, size_t i)
-        {
-            mat[i] = static_cast<T>(scalar_d[i % channels]);
-            unroll<T, channels, count - 1>::unroll_set(mat, i+1);
-        }
-
-        __device__ static void unroll_set_with_mask(T * mat, float mask, size_t i)
-        {
-            mat[i] = mask * static_cast<T>(scalar_d[i % channels]);
-            unroll<T, channels, count - 1>::unroll_set_with_mask(mat, mask, i+1);
-        }
-    };
-
-    template <typename T, int channels>
-    struct unroll<T,channels,0>
-    {
-        __device__ static void unroll_set(T * , size_t){}
-        __device__ static void unroll_set_with_mask(T * , float, size_t){}
-    };
-
-    template <typename T, int channels>
-    __global__ void kernel_set_to_without_mask(T * mat)
-    {
-        size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
-        unroll<T, channels>::unroll_set(mat, i);
-    }
-
-    template <typename T, int channels>
-    __global__ void kernel_set_to_with_mask(T * mat, const float * mask)
-    {
-        size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
-        unroll<T, channels>::unroll_set_with_mask(mat, i, mask[i]);
-    }
-}
-
-
-extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels)
-{
-    scalar_d[0] = scalar[0];
-    scalar_d[1] = scalar[1];
-    scalar_d[2] = scalar[2];
-    scalar_d[3] = scalar[3];
-
-    int numBlocks = mat.rows * mat.step / 256;
-
-    dim3 threadsPerBlock(256);
-
-    if (channels == 1)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
-    }
-    if (channels == 2)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
-    }
-    if (channels == 3)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
-    }
-}
-
-extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int depth, int channels)
-{
-    scalar_d[0] = scalar[0];
-    scalar_d[1] = scalar[1];
-    scalar_d[2] = scalar[2];
-    scalar_d[3] = scalar[3];
-
-    int numBlocks = mat.rows * mat.step / 256;
-
-    dim3 threadsPerBlock(256);
-
-    if (channels == 1)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  1><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
-    }
-    if (channels == 2)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  2><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
-    }
-    if (channels == 3)
-    {
-        if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char,  3><<<numBlocks,threadsPerBlock>>>(mat.ptr);
-        if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
-        if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int,   3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
-    }
-}
diff --git a/modules/gpu/include/opencv2/gpu/devmem2d.hpp b/modules/gpu/include/opencv2/gpu/devmem2d.hpp

index b1bbd4c..cbb515d 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/devmem2d.hpp
+++ b/modules/gpu/include/opencv2/gpu/devmem2d.hpp
@@ -48,12 +48,13 @@ namespace cv
      namespace gpu\r
      {\r
          // Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.\r
-        // It is intended to pass to nvcc-compiled code.\r
+        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile\r
  \r
          template<typename T = unsigned char>\r
          struct DevMem2D_\r
          {\r
-            enum { elem_size = sizeof(T) };\r
+            typedef T elem_t;\r
+            enum { elem_size = sizeof(elem_t) };\r
  \r
              int cols;\r
              int rows;\r
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp

index b8e8d38..7ce5e79 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -52,15 +52,20 @@ namespace cv
      {   \r
          //////////////////////////////// Initialization ////////////////////////\r
                   \r
+        //! This is the only function that do not throw exceptions if the library is compiled without Cuda.\r
          CV_EXPORTS int getCudaEnabledDeviceCount();\r
+\r
+        //! Functions below throw cv::Expception if the library is compiled without Cuda.\r
          CV_EXPORTS string getDeviceName(int device);\r
          CV_EXPORTS void setDevice(int device);        \r
+        CV_EXPORTS int getDevice();    \r
  \r
          CV_EXPORTS void getComputeCapability(int device, int* major, int* minor);\r
          CV_EXPORTS int getNumberOfSMs(int device);\r
   \r
          //////////////////////////////// GpuMat ////////////////////////////////\r
  \r
+        //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.        \r
          class CV_EXPORTS GpuMat\r
          {\r
          public:\r
@@ -85,7 +90,7 @@ namespace cv
              GpuMat(const GpuMat& m, const Rect& roi);\r
                                      \r
              //! builds GpuMat from Mat. Perfom blocking upload to device.\r
-            GpuMat (const Mat& m);\r
+            explicit GpuMat (const Mat& m);\r
  \r
              //! destructor - calls release()\r
              ~GpuMat();\r
@@ -211,44 +216,109 @@ namespace cv
              uchar* dataend;\r
          };\r
  \r
+        //////////////////////////////// MatPL ////////////////////////////////\r
+        // MatPL is limited cv::Mat with page locked memory allocation.\r
+        // Page locked memory is only needed for async and faster coping to GPU.\r
+        // It is convertable to cv::Mat header without reference counting\r
+        // so you can use it with other opencv functions.\r
+                \r
+        class CV_EXPORTS MatPL\r
+        {\r
+        public:      \r
+\r
+            //Not supported.  Now behaviour is like ALLOC_DEFAULT.\r
+            //enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }\r
+\r
+            MatPL();        \r
+            MatPL(const MatPL& m);       \r
+\r
+            MatPL(int _rows, int _cols, int _type);\r
+            MatPL(Size _size, int _type);                                                                \r
+\r
+            //! creates from cv::Mat with coping data\r
+            explicit MatPL(const Mat& m);\r
+                                                            \r
+            ~MatPL();            \r
+\r
+            MatPL& operator = (const MatPL& m);\r
+                                    \r
+            //! returns deep copy of the matrix, i.e. the data is copied\r
+            MatPL clone() const;\r
+                                                                       \r
+            //! allocates new matrix data unless the matrix already has specified size and type.            \r
+            void create(int _rows, int _cols, int _type);\r
+            void create(Size _size, int _type);                        \r
+\r
+            //! decrements reference counter and released memory if needed.\r
+            void release();\r
+\r
+            //! returns matrix header with disabled reference counting for MatPL data.\r
+            Mat createMatHeader() const;\r
+            operator Mat() const;\r
+                                                            \r
+            // Please see cv::Mat for descriptions\r
+            bool isContinuous() const;            \r
+            size_t elemSize() const;            \r
+            size_t elemSize1() const;            \r
+            int type() const;            \r
+            int depth() const;            \r
+            int channels() const;            \r
+            size_t step1() const;            \r
+            Size size() const;            \r
+            bool empty() const;\r
+                        \r
+            // Please see cv::Mat for descriptions\r
+            int flags;            \r
+            int rows, cols;            \r
+            size_t step;\r
+\r
+            uchar* data;            \r
+            int* refcount; \r
+\r
+            uchar* datastart;\r
+            uchar* dataend;\r
+        };\r
+\r
          //////////////////////////////// CudaStream ////////////////////////////////\r
+        // Encapculates Cuda Stream. Provides interface for async coping.\r
+        // Passed to each function that supports async kernel execution.\r
+        // Reference counting is enabled\r
  \r
-        class CudaStream\r
+        class CV_EXPORTS CudaStream\r
          {\r
          public:\r
-\r
-            static CudaStream empty();\r
-\r
              CudaStream(); \r
              ~CudaStream();\r
  \r
+            CudaStream(const CudaStream&); \r
+            CudaStream& operator=(const CudaStream&);\r
+\r
              bool queryIfComplete();\r
-            void waitForCompletion(); \r
+            void waitForCompletion();             \r
  \r
-            //calls cudaMemcpyAsync\r
+            //! downloads asynchronously. \r
+            // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)\r
+            void enqueueDownload(const GpuMat& src, MatPL& dst);\r
              void enqueueDownload(const GpuMat& src, Mat& dst);\r
-            void enqueueUpload(const Mat& src, GpuMat& dst);\r
-            void enqueueCopy(const GpuMat& src, GpuMat& dst);\r
  \r
-            // calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel.\r
-            void enqueueMemSet(const GpuMat& src, Scalar val);\r
+            //! uploads asynchronously. \r
+            // Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)\r
+            void enqueueUpload(const MatPL& src, GpuMat& dst);            \r
+            void enqueueUpload(const Mat& src, GpuMat& dst);\r
  \r
-            // invoke kernel asynchronous because of mask\r
+            void enqueueCopy(const GpuMat& src, GpuMat& dst);\r
+            \r
+            void enqueueMemSet(const GpuMat& src, Scalar val);            \r
              void enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask);\r
  \r
              // converts matrix type, ex from float to uchar depending on type\r
-            void enqueueConvert(const GpuMat& src, GpuMat& dst, int type); \r
-            \r
-            struct Impl;\r
-            const Impl& getImpl() const;\r
+            void enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a = 1, double b = 0); \r
          private:\r
-            \r
-            Impl *impl;            \r
-\r
-            \r
-            \r
-            CudaStream(const CudaStream&); \r
-            CudaStream& operator=(const CudaStream&);\r
+            void create();\r
+            void release();\r
+            struct Impl;\r
+            Impl *impl;                                              \r
+            friend struct StreamAccessor;\r
          };\r
  \r
          //////////////////////////////// StereoBM_GPU ////////////////////////////////\r
@@ -265,17 +335,22 @@ namespace cv
              StereoBM_GPU(int preset, int ndisparities=0);\r
              //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair\r
              //! Output disparity has CV_8U type.\r
-            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const;            \r
+            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity);\r
+\r
+            //! Acync version\r
+            void operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream);\r
+\r
+            //! Some heuristics that tries to estmate \r
+            // if current GPU will be faster then CPU in this algorithm.\r
+            // It queries current active device.\r
+            static bool checkIfGpuCallReasonable();\r
          private:\r
-            mutable GpuMat minSSD;\r
+            GpuMat minSSD;\r
              int preset;\r
              int ndisp;\r
          };\r
      }\r
  }\r
-\r
-\r
-\r
-#include "opencv2/gpu/gpumat.hpp"\r
+#include "opencv2/gpu/matrix_operations.hpp"\r
  \r
  #endif /* __OPENCV_GPU_HPP__ */
 \ No newline at end of file
diff --git a/modules/gpu/include/opencv2/gpu/matpl.hpp b/modules/gpu/include/opencv2/gpu/matpl.hpp

deleted file mode 100644 (file)

index cecc01d..0000000
--- a/modules/gpu/include/opencv2/gpu/matpl.hpp
+++ /dev/null
@@ -1,265 +0,0 @@
-/*M///////////////////////////////////////////////////////////////////////////////////////\r
-//\r
-//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
-//\r
-//  By downloading, copying, installing or using the software you agree to this license.\r
-//  If you do not agree to this license, do not download, install,\r
-//  copy or use the software.\r
-//\r
-//\r
-//                           License Agreement\r
-//                For Open Source Computer Vision Library\r
-//\r
-// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
-// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
-// Third party copyrights are property of their respective owners.\r
-//\r
-// Redistribution and use in source and binary forms, with or without modification,\r
-// are permitted provided that the following conditions are met:\r
-//\r
-//   * Redistribution's of source code must retain the above copyright notice,\r
-//     this list of conditions and the following disclaimer.\r
-//\r
-//   * Redistribution's in binary form must reproduce the above copyright notice,\r
-//     this list of conditions and the following disclaimer in the documentation\r
-//     and/or other GpuMaterials provided with the distribution.\r
-//\r
-//   * The name of the copyright holders may not be used to endorse or promote products\r
-//     derived from this software without specific prior written permission.\r
-//\r
-// This software is provided by the copyright holders and contributors "as is" and\r
-// any express or implied warranties, including, but not limited to, the implied\r
-// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
-// In no event shall the Intel Corporation or contributors be liable for any direct,\r
-// indirect, incidental, special, exemplary, or consequential damages\r
-// (including, but not limited to, procurement of substitute goods or services;\r
-// loss of use, data, or profits; or business interruption) however caused\r
-// and on any theory of liability, whether in contract, strict liability,\r
-// or tort (including negligence or otherwise) arising in any way out of\r
-// the use of this software, even if advised of the possibility of such damage.\r
-//\r
-//M*/\r
-\r
-#ifndef __OPENCV_GPU_MATPL_HPP__\r
-#define __OPENCV_GPU_MATPL_HPP__\r
-\r
-#include "opencv2/core/core.hpp"\r
-\r
-namespace cv\r
-{\r
-    namespace gpu\r
-    {   \r
-\r
-        //////////////////////////////// MatPL ////////////////////////////////\r
-\r
-        //class CV_EXPORTS MatPL : private Mat\r
-        //{\r
-        //public:            \r
-        //    MatPL() {}            \r
-        //    MatPL(int _rows, int _cols, int _type) : Mat(_rows, _cols, _type) {}\r
-        //    MatPL(Size _size, int _type) : Mat(_size, _type) {}\r
-        //    \r
-        //    Mat(int _rows, int _cols, int _type, const Scalar& _s) : Mat\r
-        //    MatPL(Size _size, int _type, const Scalar& _s);\r
-        //    //! copy constructor\r
-        //    MatPL(const Mat& m);\r
-        //    //! constructor for matrix headers pointing to user-allocated data\r
-        //    MatPL(int _rows, int _cols, int _type, void* _data, size_t _step=AUTO_STEP);\r
-        //    MatPL(Size _size, int _type, void* _data, size_t _step=AUTO_STEP);\r
-        //    //! creates a matrix header for a part of the bigger matrix\r
-        //    MatPL(const Mat& m, const Range& rowRange, const Range& colRange);\r
-        //    MatPL(const Mat& m, const Rect& roi);\r
-        //    //! converts old-style CvMat to the new matrix; the data is not copied by default\r
-        //    Mat(const CvMat* m, bool copyData=false);\r
-        //    MatPL converts old-style IplImage to the new matrix; the data is not copied by default\r
-        //    MatPL(const IplImage* img, bool copyData=false);\r
-        //    //! builds matrix from std::vector with or without copying the data\r
-        //    template<typename _Tp> explicit Mat(const vector<_Tp>& vec, bool copyData=false);\r
-        //    //! builds matrix from cv::Vec; the data is copied by default\r
-        //    template<typename _Tp, int n> explicit Mat(const Vec<_Tp, n>& vec,\r
-        //        bool copyData=true);\r
-        //    //! builds matrix from cv::Matx; the data is copied by default\r
-        //    template<typename _Tp, int m, int n> explicit Mat(const Matx<_Tp, m, n>& mtx,\r
-        //        bool copyData=true);\r
-        //    //! builds matrix from a 2D point\r
-        //    template<typename _Tp> explicit Mat(const Point_<_Tp>& pt);\r
-        //    //! builds matrix from a 3D point\r
-        //    template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt);\r
-        //    //! builds matrix from comma initializer\r
-        //    template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);\r
-        //    //! helper constructor to compile matrix expressions\r
-        //    Mat(const MatExpr_Base& expr);\r
-        //    //! destructor - calls release()\r
-        //    ~Mat();\r
-        //    //! assignment operators\r
-        //    Mat& operator = (const Mat& m);\r
-        //    Mat& operator = (const MatExpr_Base& expr);\r
-\r
-        //    operator MatExpr_<Mat, Mat>() const;\r
-\r
-        //    //! returns a new matrix header for the specified row\r
-        //    Mat row(int y) const;\r
-        //    //! returns a new matrix header for the specified column\r
-        //    Mat col(int x) const;\r
-        //    //! ... for the specified row span\r
-        //    Mat rowRange(int startrow, int endrow) const;\r
-        //    Mat rowRange(const Range& r) const;\r
-        //    //! ... for the specified column span\r
-        //    Mat colRange(int startcol, int endcol) const;\r
-        //    Mat colRange(const Range& r) const;\r
-        //    //! ... for the specified diagonal\r
-        //    // (d=0 - the main diagonal,\r
-        //    //  >0 - a diagonal from the lower half,\r
-        //    //  <0 - a diagonal from the upper half)\r
-        //    Mat diag(int d=0) const;\r
-        //    //! constructs a square diagonal matrix which main diagonal is vector "d"\r
-        //    static Mat diag(const Mat& d);\r
-\r
-        //    //! returns deep copy of the matrix, i.e. the data is copied\r
-        //    Mat clone() const;\r
-        //    //! copies the matrix content to "m".\r
-        //    // It calls m.create(this->size(), this->type()).\r
-        //    void copyTo( Mat& m ) const;\r
-        //    //! copies those matrix elements to "m" that are marked with non-zero mask elements.\r
-        //    void copyTo( Mat& m, const Mat& mask ) const;\r
-        //    //! converts matrix to another datatype with optional scalng. See cvConvertScale.\r
-        //    void convertTo( Mat& m, int rtype, double alpha=1, double beta=0 ) const;\r
-\r
-        //    void assignTo( Mat& m, int type=-1 ) const;\r
-\r
-        //    //! sets every matrix element to s\r
-        //    Mat& operator = (const Scalar& s);\r
-        //    //! sets some of the matrix elements to s, according to the mask\r
-        //    Mat& setTo(const Scalar& s, const Mat& mask=Mat());\r
-        //    //! creates alternative matrix header for the same data, with different\r
-        //    // number of channels and/or different number of rows. see cvReshape.\r
-        //    Mat reshape(int _cn, int _rows=0) const;\r
-\r
-        //    //! matrix transposition by means of matrix expressions\r
-        //    MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_T_<Mat> >, Mat>\r
-        //        t() const;\r
-        //    //! matrix inversion by means of matrix expressions\r
-        //    MatExpr_<MatExpr_Op2_<Mat, int, Mat, MatOp_Inv_<Mat> >, Mat>\r
-        //        inv(int method=DECOMP_LU) const;\r
-        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>\r
-        //        //! per-element matrix multiplication by means of matrix expressions\r
-        //        mul(const Mat& m, double scale=1) const;\r
-        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>\r
-        //        mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_Scale_<Mat> >, Mat>& m, double scale=1) const;\r
-        //    MatExpr_<MatExpr_Op4_<Mat, Mat, double, char, Mat, MatOp_MulDiv_<Mat> >, Mat>    \r
-        //        mul(const MatExpr_<MatExpr_Op2_<Mat, double, Mat, MatOp_DivRS_<Mat> >, Mat>& m, double scale=1) const;\r
-\r
-        //    //! computes cross-product of 2 3D vectors\r
-        //    Mat cross(const Mat& m) const;\r
-        //    //! computes dot-product\r
-        //    double dot(const Mat& m) const;\r
-\r
-        //    //! Matlab-style matrix initialization\r
-        //    static MatExpr_Initializer zeros(int rows, int cols, int type);\r
-        //    static MatExpr_Initializer zeros(Size size, int type);\r
-        //    static MatExpr_Initializer ones(int rows, int cols, int type);\r
-        //    static MatExpr_Initializer ones(Size size, int type);\r
-        //    static MatExpr_Initializer eye(int rows, int cols, int type);\r
-        //    static MatExpr_Initializer eye(Size size, int type);\r
-\r
-        //    //! allocates new matrix data unless the matrix already has specified size and type.\r
-        //    // previous data is unreferenced if needed.\r
-        //    void create(int _rows, int _cols, int _type);\r
-        //    void create(Size _size, int _type);\r
-        //    //! increases the reference counter; use with care to avoid memleaks\r
-        //    void addref();\r
-        //    //! decreases reference counter;\r
-        //    // deallocate the data when reference counter reaches 0.\r
-        //    void release();\r
-\r
-        //    //! locates matrix header within a parent matrix. See below\r
-        //    void locateROI( Size& wholeSize, Point& ofs ) const;\r
-        //    //! moves/resizes the current matrix ROI inside the parent matrix.\r
-        //    Mat& adjustROI( int dtop, int dbottom, int dleft, int dright );\r
-        //    //! extracts a rectangular sub-matrix\r
-        //    // (this is a generalized form of row, rowRange etc.)\r
-        //    Mat operator()( Range rowRange, Range colRange ) const;\r
-        //    Mat operator()( const Rect& roi ) const;\r
-\r
-        //    //! converts header to CvMat; no data is copied\r
-        //    operator CvMat() const;\r
-        //    //! converts header to IplImage; no data is copied\r
-        //    operator IplImage() const;\r
-\r
-        //    //! returns true iff the matrix data is continuous\r
-        //    // (i.e. when there are no gaps between successive rows).\r
-        //    // similar to CV_IS_MAT_CONT(cvmat->type)\r
-        //    bool isContinuous() const;\r
-        //    //! returns element size in bytes,\r
-        //    // similar to CV_ELEM_SIZE(cvmat->type)\r
-        //    size_t elemSize() const;\r
-        //    //! returns the size of element channel in bytes.\r
-        //    size_t elemSize1() const;\r
-        //    //! returns element type, similar to CV_MAT_TYPE(cvmat->type)\r
-        //    int type() const;\r
-        //    //! returns element type, similar to CV_MAT_DEPTH(cvmat->type)\r
-        //    int depth() const;\r
-        //    //! returns element type, similar to CV_MAT_CN(cvmat->type)\r
-        //    int channels() const;\r
-        //    //! returns step/elemSize1()\r
-        //    size_t step1() const;\r
-        //    //! returns matrix size:\r
-        //    // width == number of columns, height == number of rows\r
-        //    Size size() const;\r
-        //    //! returns true if matrix data is NULL\r
-        //    bool empty() const;\r
-\r
-        //    //! returns pointer to y-th row\r
-        //    uchar* ptr(int y=0);\r
-        //    const uchar* ptr(int y=0) const;\r
-\r
-        //    //! template version of the above method\r
-        //    template<typename _Tp> _Tp* ptr(int y=0);\r
-        //    template<typename _Tp> const _Tp* ptr(int y=0) const;\r
-\r
-        //    //! template methods for read-write or read-only element access.\r
-        //    // note that _Tp must match the actual matrix type -\r
-        //    // the functions do not do any on-fly type conversion\r
-        //    template<typename _Tp> _Tp& at(int y, int x);\r
-        //    template<typename _Tp> _Tp& at(Point pt);\r
-        //    template<typename _Tp> const _Tp& at(int y, int x) const;\r
-        //    template<typename _Tp> const _Tp& at(Point pt) const;\r
-        //    template<typename _Tp> _Tp& at(int i);\r
-        //    template<typename _Tp> const _Tp& at(int i) const;\r
-\r
-        //    //! template methods for iteration over matrix elements.\r
-        //    // the iterators take care of skipping gaps in the end of rows (if any)\r
-        //    template<typename _Tp> MatIterator_<_Tp> begin();\r
-        //    template<typename _Tp> MatIterator_<_Tp> end();\r
-        //    template<typename _Tp> MatConstIterator_<_Tp> begin() const;\r
-        //    template<typename _Tp> MatConstIterator_<_Tp> end() const;\r
-\r
-        //    enum { MAGIC_VAL=0x42FF0000, AUTO_STEP=0, CONTINUOUS_FLAG=CV_MAT_CONT_FLAG };\r
-\r
-        //    /*! includes several bit-fields:\r
-        //    - the magic signature\r
-        //    - continuity flag\r
-        //    - depth\r
-        //    - number of channels\r
-        //    */\r
-        //    int flags;\r
-        //    //! the number of rows and columns\r
-        //    int rows, cols;\r
-        //    //! a distance between successive rows in bytes; includes the gap if any\r
-        //    size_t step;\r
-        //    //! pointer to the data\r
-        //    uchar* data;\r
-\r
-        //    //! pointer to the reference counter;\r
-        //    // when matrix points to user-allocated data, the pointer is NULL\r
-        //    int* refcount;\r
-\r
-        //    //! helper fields used in locateROI and adjustROI\r
-        //    uchar* datastart;\r
-        //    uchar* dataend;\r
-        //};\r
-    }\r
-}\r
-\r
-\r
-#endif /* __OPENCV_GPU_MATPL_HPP__ */
-\ No newline at end of file
diff --git a/modules/gpu/include/opencv2/gpu/gpumat.hpp b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp

similarity index 75%

rename from modules/gpu/include/opencv2/gpu/gpumat.hpp

rename to modules/gpu/include/opencv2/gpu/matrix_operations.hpp

index 922ff8d..81117d6 100644 (file)
--- a/modules/gpu/include/opencv2/gpu/gpumat.hpp
+++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp
@@ -43,27 +43,25 @@
  #ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__\r
  #define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__\r
  \r
-\r
  namespace cv\r
  {\r
  \r
  namespace gpu\r
  {\r
  \r
+////////////////////////////////////////////////////////////////////////\r
  //////////////////////////////// GpuMat ////////////////////////////////\r
+////////////////////////////////////////////////////////////////////////\r
  \r
-inline GpuMat::GpuMat()\r
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
+inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
  \r
-inline GpuMat::GpuMat(int _rows, int _cols, int _type)\r
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+inline GpuMat::GpuMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
  {\r
      if( _rows > 0 && _cols > 0 )\r
          create( _rows, _cols, _type );\r
  }\r
  \r
-inline GpuMat::GpuMat(Size _size, int _type)\r
-    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+inline GpuMat::GpuMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
  {\r
      if( _size.height > 0 && _size.width > 0 )\r
          create( _size.height, _size.width, _type );\r
@@ -249,12 +247,9 @@ inline void GpuMat::assignTo( GpuMat& m, int type ) const
  \r
  //CPP GpuMat& GpuMat::operator = (const Scalar& s);\r
  //CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat());\r
-\r
  //CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const;\r
-\r
-//CPP void GpuMat::create(int _rows, int _cols, int _type);\r
  inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); }\r
-\r
+//CPP void GpuMat::create(int _rows, int _cols, int _type);\r
  //CPP void GpuMat::release();\r
  \r
  inline void GpuMat::swap(GpuMat& b) \r
@@ -343,6 +338,87 @@ template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
  static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); }\r
  \r
  \r
+///////////////////////////////////////////////////////////////////////\r
+//////////////////////////////// MatPL ////////////////////////////////\r
+///////////////////////////////////////////////////////////////////////\r
+\r
+MatPL::MatPL()  : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {}\r
+MatPL::MatPL(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+    if( _rows > 0 && _cols > 0 )\r
+        create( _rows, _cols, _type );\r
+}\r
+\r
+MatPL::MatPL(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+    if( _size.height > 0 && _size.width > 0 )\r
+        create( _size.height, _size.width, _type );\r
+}\r
+\r
+MatPL::MatPL(const MatPL& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(0), dataend(0)\r
+{\r
+    if( refcount )\r
+        CV_XADD(refcount, 1);\r
+\r
+}\r
+\r
+MatPL::MatPL(const Mat& m) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)\r
+{\r
+    if( m.rows > 0 && m.cols > 0 )\r
+        create( m.size(), m.type() );\r
+\r
+    Mat tmp = createMatHeader();\r
+    m.copyTo(tmp);\r
+}\r
+\r
+MatPL::~MatPL()\r
+{\r
+    release();\r
+}\r
+MatPL& MatPL::operator = (const MatPL& m)\r
+{\r
+    if( this != &m )\r
+    {\r
+        if( m.refcount )\r
+            CV_XADD(m.refcount, 1);\r
+        release();\r
+        flags = m.flags;\r
+        rows = m.rows; cols = m.cols;\r
+        step = m.step; data = m.data;                \r
+        datastart = m.datastart;\r
+        dataend = m.dataend;\r
+        refcount = m.refcount;\r
+    }\r
+    return *this;\r
+}\r
+\r
+MatPL MatPL::clone() const\r
+{\r
+    MatPL m(size(), type());            \r
+    Mat to = m;\r
+    Mat from = *this;\r
+    from.copyTo(to);\r
+    return m;\r
+}\r
+\r
+inline void MatPL::create(Size _size, int _type) { create(_size.height, _size.width, _type); } \r
+//CCP void MatPL::create(int _rows, int _cols, int _type);                \r
+//CPP void MatPL::release();\r
+\r
+inline Mat MatPL::createMatHeader() const { return Mat(size(), type(), data); }\r
+inline MatPL::operator Mat() const { return createMatHeader(); }\r
+\r
+inline bool MatPL::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }\r
+inline size_t MatPL::elemSize() const { return CV_ELEM_SIZE(flags); }\r
+inline size_t MatPL::elemSize1() const { return CV_ELEM_SIZE1(flags); }\r
+inline int MatPL::type() const { return CV_MAT_TYPE(flags); }\r
+inline int MatPL::depth() const { return CV_MAT_DEPTH(flags); }\r
+inline int MatPL::channels() const { return CV_MAT_CN(flags); }\r
+inline size_t MatPL::step1() const { return step/elemSize1(); }\r
+inline Size MatPL::size() const { return Size(cols, rows); }\r
+inline bool MatPL::empty() const { return data == 0; }  \r
+\r
+\r
  } /* end of namespace gpu */\r
  \r
  } /* end of namespace cv */\r
diff --git a/modules/gpu/include/opencv2/gpu/stream_accessor.hpp b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp

new file mode 100644 (file)

index 0000000..389b7cd
--- /dev/null
+++ b/modules/gpu/include/opencv2/gpu/stream_accessor.hpp
@@ -0,0 +1,64 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////\r
+//\r
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.\r
+//\r
+//  By downloading, copying, installing or using the software you agree to this license.\r
+//  If you do not agree to this license, do not download, install,\r
+//  copy or use the software.\r
+//\r
+//\r
+//                           License Agreement\r
+//                For Open Source Computer Vision Library\r
+//\r
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.\r
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.\r
+// Third party copyrights are property of their respective owners.\r
+//\r
+// Redistribution and use in source and binary forms, with or without modification,\r
+// are permitted provided that the following conditions are met:\r
+//\r
+//   * Redistribution's of source code must retain the above copyright notice,\r
+//     this list of conditions and the following disclaimer.\r
+//\r
+//   * Redistribution's in binary form must reproduce the above copyright notice,\r
+//     this list of conditions and the following disclaimer in the documentation\r
+//     and/or other GpuMaterials provided with the distribution.\r
+//\r
+//   * The name of the copyright holders may not be used to endorse or promote products\r
+//     derived from this software without specific prior written permission.\r
+//\r
+// This software is provided by the copyright holders and contributors "as is" and\r
+// any express or implied warranties, including, but not limited to, the implied\r
+// warranties of merchantability and fitness for a particular purpose are disclaimed.\r
+// In no event shall the Intel Corporation or contributors be liable for any direct,\r
+// indirect, incidental, special, exemplary, or consequential damages\r
+// (including, but not limited to, procurement of substitute goods or services;\r
+// loss of use, data, or profits; or business interruption) however caused\r
+// and on any theory of liability, whether in contract, strict liability,\r
+// or tort (including negligence or otherwise) arising in any way out of\r
+// the use of this software, even if advised of the possibility of such damage.\r
+//\r
+//M*/\r
+\r
+#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__\r
+#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__\r
+\r
+#include "opencv2/gpu/gpu.hpp"\r
+#include "cuda_runtime_api.h"\r
+\r
+namespace cv\r
+{\r
+    namespace gpu\r
+    {\r
+        // This is only header file that depends on Cuda. All other headers are independent.\r
+        // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.\r
+        // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.\r
+        // In this case you have to install Cuda Toolkit.\r
+        struct StreamAccessor\r
+        {\r
+            CV_EXPORTS static cudaStream_t getStream(const CudaStream& stream);\r
+        };\r
+    }\r
+}\r
+\r
+#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
+\ No newline at end of file
diff --git a/modules/gpu/cuda/cuda_shared.hpp b/modules/gpu/src/cuda/cuda_shared.hpp

similarity index 81%

rename from modules/gpu/cuda/cuda_shared.hpp

rename to modules/gpu/src/cuda/cuda_shared.hpp

index d7b81c7..917d450 100644 (file)
--- a/modules/gpu/cuda/cuda_shared.hpp
+++ b/modules/gpu/src/cuda/cuda_shared.hpp
@@ -44,31 +44,37 @@
  #define __OPENCV_CUDA_SHARED_HPP__\r
  \r
  #include "opencv2/gpu/devmem2d.hpp"\r
+#include "cuda_runtime_api.h"   \r
  \r
  namespace cv\r
  {\r
      namespace gpu\r
-    {\r
+    {   \r
          typedef unsigned char uchar;\r
          typedef unsigned short ushort;\r
-        typedef unsigned int uint;\r
+        typedef unsigned int uint;        \r
  \r
          extern "C" void error( const char *error_string, const char *file, const int line, const char *func = "");\r
  \r
          namespace impl\r
-        {\r
+        {   \r
              static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }\r
  \r
              extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, DevMem2D& disp, int maxdisp, DevMem2D_<uint>& minSSD_buf);\r
-\r
-            extern "C" void set_to_without_mask (const DevMem2D& mat, const double * scalar, int depth, int channels);\r
-            extern "C" void set_to_with_mask    (const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels);\r
          }\r
      }\r
  }\r
  \r
-#ifdef __CUDACC__\r
-    #define cudaSafeCall(expr) { cudaError_t err = expr; if( cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); }\r
+#if defined(__GNUC__)\r
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__, __func__);\r
+#else /* defined(__CUDACC__) || defined(__MSVC__) */\r
+    #define cudaSafeCall(expr)  ___cudaSafeCall(expr, __FILE__, __LINE__) \r
  #endif\r
  \r
+    static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")\r
+    {\r
+        if( cudaSuccess != err) \r
+            cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, func);\r
+    }\r
+\r
  #endif /* __OPENCV_CUDA_SHARED_HPP__ */\r
diff --git a/modules/gpu/cuda/stereobm.cu b/modules/gpu/src/cuda/stereobm.cu

similarity index 100%

rename from modules/gpu/cuda/stereobm.cu

rename to modules/gpu/src/cuda/stereobm.cu
diff --git a/modules/gpu/src/cudastream.cpp b/modules/gpu/src/cudastream.cpp

index 818dff0..77c2900 100644 (file)
--- a/modules/gpu/src/cudastream.cpp
+++ b/modules/gpu/src/cudastream.cpp
@@ -41,57 +41,119 @@
  //M*/\r
  \r
  #include "precomp.hpp"\r
-//#include "opencv2/gpu/stream_access.hpp"\r
  \r
  using namespace cv;\r
  using namespace cv::gpu;\r
  \r
  \r
-cv::gpu::CudaStream::CudaStream() //: impl( (Impl*)fastMalloc(sizeof(Impl)) )\r
+#if !defined (HAVE_CUDA)\r
+\r
+void cv::gpu::CudaStream::create() { throw_nogpu(); }\r
+void cv::gpu::CudaStream::release() { throw_nogpu(); }\r
+cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); }\r
+cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); }\r
+cv::gpu::CudaStream::CudaStream(const CudaStream& stream) { throw_nogpu(); }\r
+CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream) { throw_nogpu(); return *this; }\r
+bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; }\r
+void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask) { throw_nogpu(); }\r
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b) { throw_nogpu(); }\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
+\r
+#include "opencv2/gpu/stream_accessor.hpp"\r
+\r
+struct CudaStream::Impl\r
  {\r
-    //cudaSafeCall( cudaStreamCreate( &impl->stream) );\r
-}\r
-cv::gpu::CudaStream::~CudaStream()\r
+    cudaStream_t stream;\r
+    int ref_counter;\r
+};\r
+namespace \r
  {\r
-    if (impl)\r
+    template<class S, class D> void devcopy(const S& src, D& dst, cudaStream_t s, cudaMemcpyKind k)\r
      {\r
-        cudaSafeCall( cudaStreamDestroy( *(cudaStream_t*)impl ) );\r
-        cv::fastFree( impl );\r
-    }\r
+        dst.create(src.size(), src.type());\r
+        size_t bwidth = src.cols * src.elemSize();\r
+        cudaSafeCall( cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, bwidth, src.rows, k, s) ); \r
+    };\r
  }\r
  \r
-bool cv::gpu::CudaStream::queryIfComplete()\r
+CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; };\r
+\r
+void cv::gpu::CudaStream::create()\r
  {\r
-    //cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl );\r
+    if (impl)\r
+        release();\r
  \r
-    //if (err == cudaSuccess)\r
-    //    return true;\r
+    cudaStream_t stream;\r
+    cudaSafeCall( cudaStreamCreate( &stream ) );\r
  \r
-    //if (err == cudaErrorNotReady)\r
-    //    return false;\r
+    impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl));\r
  \r
-    ////cudaErrorInvalidResourceHandle\r
-    //cudaSafeCall( err );\r
-    return true;\r
+    impl->stream = stream;\r
+    impl->ref_counter = 1;    \r
  }\r
-void cv::gpu::CudaStream::waitForCompletion()\r
+\r
+void cv::gpu::CudaStream::release()\r
  {\r
-    cudaSafeCall( cudaStreamSynchronize(  *(cudaStream_t*)impl ) );\r
+    if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )\r
+    {\r
+        cudaSafeCall( cudaStreamDestroy( impl->stream ) );\r
+        cv::fastFree( impl );\r
+    }\r
  }\r
  \r
-void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)\r
+cv::gpu::CudaStream::CudaStream() : impl(0) { create(); }\r
+cv::gpu::CudaStream::~CudaStream() { release(); }\r
+\r
+cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl)\r
  {\r
-//    cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost,\r
+    if( impl )\r
+        CV_XADD(&impl->ref_counter, 1);\r
  }\r
-void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst)\r
+CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)\r
  {\r
-    CV_Assert(!"Not implemented");\r
+    if( this != &stream )\r
+    {\r
+        if( stream.impl )\r
+            CV_XADD(&stream.impl->ref_counter, 1);\r
+\r
+        release();\r
+        impl = stream.impl;        \r
+    }\r
+    return *this;\r
  }\r
-void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst)\r
+\r
+bool cv::gpu::CudaStream::queryIfComplete()\r
  {\r
-    CV_Assert(!"Not implemented");\r
+    cudaError_t err = cudaStreamQuery( impl->stream );\r
+\r
+    if (err == cudaErrorNotReady || err == cudaSuccess)\r
+        return err == cudaSuccess;\r
+\r
+    cudaSafeCall(err);\r
  }\r
  \r
+void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }\r
+\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst) \r
+{ \r
+    // if not -> allocation will be done, but after that dst will not point to page locked memory\r
+    CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )\r
+     devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); \r
+}\r
+void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }\r
+\r
+void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream,   cudaMemcpyHostToDevice); }\r
+void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst)  { devcopy(src, dst, impl->stream,   cudaMemcpyHostToDevice); }   \r
+void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }\r
+\r
  void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)\r
  {\r
      CV_Assert(!"Not implemented");\r
@@ -102,11 +164,10 @@ void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const Gpu
      CV_Assert(!"Not implemented");\r
  }\r
  \r
-void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type)\r
+void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int type, double a, double b)\r
  {\r
      CV_Assert(!"Not implemented");\r
  }\r
  \r
-//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; }\r
-\r
  \r
+#endif /* !defined (HAVE_CUDA) */
+\ No newline at end of file
diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp

index e88fbb9..416d3d4 100644 (file)
--- a/modules/gpu/src/initialization.cpp
+++ b/modules/gpu/src/initialization.cpp
@@ -45,15 +45,18 @@
  using namespace cv;\r
  using namespace cv::gpu;\r
  \r
-#ifndef HAVE_CUDA\r
+\r
+#if !defined (HAVE_CUDA)\r
  \r
  CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount() { return 0; }\r
-CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/)  { cudaSafeCall(0); return 0; } \r
-CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { cudaSafeCall(0); } \r
-CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { cudaSafeCall(0); } \r
-CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { cudaSafeCall(0); return 0; } \r
+CV_EXPORTS string cv::gpu::getDeviceName(int /*device*/)  { throw_nogpu(); return 0; } \r
+CV_EXPORTS void cv::gpu::setDevice(int /*device*/) { throw_nogpu(); } \r
+CV_EXPORTS int cv::gpu::getDevice() { throw_nogpu(); return 0; } \r
+CV_EXPORTS void cv::gpu::getComputeCapability(int /*device*/, int* /*major*/, int* /*minor*/) { throw_nogpu(); } \r
+CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0; } \r
+\r
  \r
-#else\r
+#else /* !defined (HAVE_CUDA) */\r
  \r
  CV_EXPORTS int cv::gpu::getCudaEnabledDeviceCount()\r
  {\r
@@ -73,6 +76,12 @@ CV_EXPORTS void cv::gpu::setDevice(int device)
  {\r
      cudaSafeCall( cudaSetDevice( device ) );\r
  }\r
+CV_EXPORTS int cv::gpu::getDevice()\r
+{\r
+    int device;    \r
+    cudaSafeCall( cudaGetDevice( &device ) );\r
+    return device;\r
+}\r
  \r
  CV_EXPORTS void cv::gpu::getComputeCapability(int device, int* major, int* minor)\r
  {\r
@@ -90,4 +99,5 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
      return prop.multiProcessorCount;\r
  }\r
  \r
-#endif
-\ No newline at end of file
+#endif\r
+\r
diff --git a/modules/gpu/src/gpumat.cpp b/modules/gpu/src/matrix_operations.cpp

similarity index 56%

rename from modules/gpu/src/gpumat.cpp

rename to modules/gpu/src/matrix_operations.cpp

index 2849868..1d27afb 100644 (file)
--- a/modules/gpu/src/gpumat.cpp
+++ b/modules/gpu/src/matrix_operations.cpp
@@ -45,23 +45,53 @@
  using namespace cv;\r
  using namespace cv::gpu;\r
  \r
+////////////////////////////////////////////////////////////////////////\r
  //////////////////////////////// GpuMat ////////////////////////////////\r
+////////////////////////////////////////////////////////////////////////\r
  \r
-void GpuMat::upload(const Mat& m)\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+namespace cv\r
+{\r
+    namespace gpu\r
+    {\r
+        void GpuMat::upload(const Mat& /*m*/) { throw_nogpu(); }\r
+        void GpuMat::download(cv::Mat& /*m*/) const { throw_nogpu(); }\r
+        void GpuMat::copyTo( GpuMat& /*m*/ ) const { throw_nogpu(); }\r
+        void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const { throw_nogpu(); }\r
+        void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { throw_nogpu(); }\r
+        GpuMat& GpuMat::operator = (const Scalar& /*s*/) { throw_nogpu(); return *this; }\r
+        GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) { throw_nogpu(); return *this; }\r
+        GpuMat GpuMat::reshape(int /*new_cn*/, int /*new_rows*/) const { throw_nogpu(); return GpuMat(); }\r
+        void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }\r
+        void GpuMat::release() { throw_nogpu(); }\r
+\r
+        void MatPL::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); }\r
+        void MatPL::release() { throw_nogpu(); }\r
+    }\r
+\r
+}\r
+\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
+\r
+\r
+void cv::gpu::GpuMat::upload(const Mat& m)\r
  {\r
      CV_DbgAssert(!m.empty());\r
      create(m.size(), m.type());\r
      cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );\r
  }\r
  \r
-void GpuMat::download(cv::Mat& m) const\r
+void cv::gpu::GpuMat::download(cv::Mat& m) const\r
  {\r
      CV_DbgAssert(!this->empty());\r
      m.create(size(), type());\r
      cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );\r
  }\r
  \r
-void GpuMat::copyTo( GpuMat& m ) const\r
+void cv::gpu::GpuMat::copyTo( GpuMat& m ) const\r
  {\r
      CV_DbgAssert(!this->empty());\r
      m.create(size(), type());\r
@@ -69,45 +99,30 @@ void GpuMat::copyTo( GpuMat& m ) const
      cudaSafeCall( cudaThreadSynchronize() );\r
  }\r
  \r
-void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const\r
-{\r
+void cv::gpu::GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const\r
+{    \r
      CV_Assert(!"Not implemented");\r
  }\r
  \r
-void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const\r
+void cv::gpu::GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const\r
  {\r
      CV_Assert(!"Not implemented");\r
  }\r
  \r
-GpuMat& GpuMat::operator = (const Scalar& s)\r
+GpuMat& cv::gpu::GpuMat::operator = (const Scalar& /*s*/)\r
  {\r
-    cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
+    CV_Assert(!"Not implemented"); \r
      return *this;\r
  }\r
  \r
-GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)\r
+GpuMat& cv::gpu::GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/)\r
  {\r
-    CV_Assert(mask.type() == CV_32F);\r
-\r
-    CV_DbgAssert(!this->empty());\r
-\r
-    this->channels();\r
-    this->depth();\r
-\r
-    if (mask.empty())\r
-    {\r
-        cv::gpu::impl::set_to_without_mask(*this, s.val, this->depth(), this->channels());\r
-    }\r
-    else\r
-    {\r
-        cv::gpu::impl::set_to_with_mask(*this, s.val, mask, this->depth(), this->channels());\r
-    }\r
-\r
+    CV_Assert(!"Not implemented");    \r
      return *this;\r
  }\r
  \r
  \r
-GpuMat GpuMat::reshape(int new_cn, int new_rows) const\r
+GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const\r
  {\r
      GpuMat hdr = *this;\r
  \r
@@ -148,7 +163,7 @@ GpuMat GpuMat::reshape(int new_cn, int new_rows) const
      return hdr;\r
  }\r
  \r
-void GpuMat::create(int _rows, int _cols, int _type)\r
+void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)\r
  {\r
      _type &= TYPE_MASK;\r
      if( rows == _rows && cols == _cols && type() == _type && data )\r
@@ -162,7 +177,7 @@ void GpuMat::create(int _rows, int _cols, int _type)
          rows = _rows;\r
          cols = _cols;\r
  \r
-        size_t esz = elemSize();\r
+        size_t esz = elemSize();                \r
  \r
          void *dev_ptr;\r
          cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) );\r
@@ -174,19 +189,19 @@ void GpuMat::create(int _rows, int _cols, int _type)
          size_t nettosize = (size_t)_nettosize;\r
  \r
          datastart = data = (uchar*)dev_ptr;\r
-        dataend = data + nettosize;\r
+        dataend = data + nettosize;            \r
  \r
          refcount = (int*)fastMalloc(sizeof(*refcount));\r
          *refcount = 1;\r
      }\r
  }\r
  \r
-void GpuMat::release()\r
+void cv::gpu::GpuMat::release()\r
  {\r
      if( refcount && CV_XADD(refcount, -1) == 1 )\r
      {\r
          fastFree(refcount);\r
-        cudaSafeCall( cudaFree(datastart) );\r
+        cudaSafeCall( cudaFree(datastart) );        \r
      }\r
      data = datastart = dataend = 0;\r
      step = rows = cols = 0;\r
@@ -194,7 +209,52 @@ void GpuMat::release()
  }\r
  \r
  \r
+///////////////////////////////////////////////////////////////////////\r
+//////////////////////////////// MatPL ////////////////////////////////\r
+///////////////////////////////////////////////////////////////////////\r
+\r
+void cv::gpu::MatPL::create(int _rows, int _cols, int _type)\r
+{\r
+    _type &= TYPE_MASK;\r
+    if( rows == _rows && cols == _cols && type() == _type && data )\r
+        return;\r
+    if( data )\r
+        release();\r
+    CV_DbgAssert( _rows >= 0 && _cols >= 0 );\r
+    if( _rows > 0 && _cols > 0 )\r
+    {\r
+        flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + _type;\r
+        rows = _rows;\r
+        cols = _cols;\r
+        step = elemSize()*cols;\r
+        int64 _nettosize = (int64)step*rows;\r
+        size_t nettosize = (size_t)_nettosize;\r
+        if( _nettosize != (int64)nettosize )\r
+            CV_Error(CV_StsNoMem, "Too big buffer is allocated");\r
+        size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));\r
+\r
+        //datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));        \r
+        void *ptr;\r
+        cudaSafeCall( cudaHostAlloc( &ptr, datasize, cudaHostAllocDefault) );\r
  \r
+        datastart = data =  (uchar*)ptr;        \r
+        dataend = data + nettosize;       \r
  \r
+        refcount = (int*)cv::fastMalloc(sizeof(*refcount));\r
+        *refcount = 1;\r
+    }\r
+}\r
  \r
+void cv::gpu::MatPL::release()\r
+{\r
+    if( refcount && CV_XADD(refcount, -1) == 1 )\r
+    {\r
+        cudaSafeCall( cudaFreeHost(datastart ) );\r
+        fastFree(refcount);\r
+    }\r
+    data = datastart = dataend = 0;\r
+    step = rows = cols = 0;\r
+    refcount = 0;\r
+}\r
  \r
+#endif /* !defined (HAVE_CUDA) */
+\ No newline at end of file
diff --git a/modules/gpu/src/precomp.cpp b/modules/gpu/src/precomp.cpp

index c4a61d9..d0216db 100644 (file)
--- a/modules/gpu/src/precomp.cpp
+++ b/modules/gpu/src/precomp.cpp
@@ -44,7 +44,13 @@
  /* End of file. */\r
  \r
  \r
-extern "C" void cv::gpu::error( const char *error_string, const char *file, const int line, const char *func)\r
-{                       \r
-    cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) );\r
-}     \r
+namespace cv\r
+{\r
+    namespace gpu\r
+    {\r
+        extern "C" void error(const char *error_string, const char *file, const int line, const char *func)\r
+        {                       \r
+            cv::error( cv::Exception(CV_GpuApiCallError, error_string, func, file, line) );\r
+        }\r
+    }\r
+}\r
diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp

index ca87d9a..a632af0 100644 (file)
--- a/modules/gpu/src/precomp.hpp
+++ b/modules/gpu/src/precomp.hpp
@@ -53,30 +53,17 @@
  #include <iostream>\r
  \r
  #include "opencv2/gpu/gpu.hpp"\r
-#include "cuda_shared.hpp"\r
  \r
-#ifndef HAVE_CUDA\r
  \r
-    #define cudaSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")\r
-    #define cudaCallerSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")\r
+#if defined(HAVE_CUDA)\r
  \r
-#else /* HAVE_CUDA */\r
+    #include "cuda_shared.hpp"\r
+    #include "cuda_runtime_api.h"   \r
  \r
-#if _MSC_VER >= 1200   \r
-    #pragma warning (disable : 4100 4211 4201 4408)    \r
-#endif\r
-\r
-#include "cuda_runtime_api.h"\r
-\r
-#ifdef __GNUC__   \r
-    #define cudaSafeCall(expr)  { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, __func__); } \r
-#else    \r
-    #define cudaSafeCall(expr)  { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); } \r
-#endif\r
+#else /* defined(HAVE_CUDA) */\r
  \r
-#define cudaCallerSafeCall(expr) expr;\r
+    static inline void throw_nogpu() { CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support"); }\r
+        \r
+#endif /* defined(HAVE_CUDA) */\r
  \r
-\r
-#endif /* HAVE_CUDA */\r
-\r
-#endif\r
+#endif /* __OPENCV_PRECOMP_H__ */ \r
diff --git a/modules/gpu/src/stereobm_gpu.cpp b/modules/gpu/src/stereobm_gpu.cpp

index fb15631..ae96700 100644 (file)
--- a/modules/gpu/src/stereobm_gpu.cpp
+++ b/modules/gpu/src/stereobm_gpu.cpp
@@ -44,15 +44,45 @@
  \r
  using namespace cv;\r
  using namespace cv::gpu;\r
+\r
+#if !defined (HAVE_CUDA)\r
+\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_nogpu(); }\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) { throw_nogpu(); }\r
+\r
+bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) { throw_nogpu(); }\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream) { throw_nogpu(); }\r
+\r
+\r
+#else /* !defined (HAVE_CUDA) */\r
     \r
-StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64)  {}\r
-StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) \r
+cv::gpu::StereoBM_GPU::StereoBM_GPU() : preset(BASIC_PRESET), ndisp(64)  {}\r
+cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_) : preset(preset_), ndisp(ndisparities_) \r
  {\r
      const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);\r
      CV_Assert(ndisp <= max_supported_ndisp);\r
+    CV_Assert(ndisp % 8 == 0);\r
+}\r
+\r
+bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()\r
+{\r
+    if (0 == getCudaEnabledDeviceCount())\r
+        return false;\r
+\r
+    int device = getDevice();\r
+\r
+    int minor, major;\r
+    getComputeCapability(device, &major, &minor);\r
+    int numSM = getNumberOfSMs(device);\r
+\r
+    if (major > 1 || numSM > 16)\r
+        return true;        \r
+\r
+    return false;\r
  }\r
    \r
-void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity) const\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity)\r
  {\r
      CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);\r
      CV_DbgAssert(left.type() == CV_8UC1);\r
@@ -67,6 +97,13 @@ void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat&
      }   \r
  \r
      DevMem2D disp = disparity;\r
-    DevMem2D_<uint> mssd = minSSD;    \r
-    cudaCallerSafeCall( impl::stereoBM_GPU(left, right, disp, ndisp, mssd) );     \r
+    DevMem2D_<unsigned int> mssd = minSSD;    \r
+    impl::stereoBM_GPU(left, right, disp, ndisp, mssd);     \r
  }\r
+\r
+void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)\r
+{\r
+    CV_Assert(!"Not implemented");\r
+}\r
+\r
+#endif /* !defined (HAVE_CUDA) */
+\ No newline at end of file
author	Anatoly Baksheev <no@email>
	Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)
committer	Anatoly Baksheev <no@email>
	Mon, 19 Jul 2010 09:31:12 +0000 (09:31 +0000)
modules/gpu/CMakeLists.txt		patch \| blob \| history
modules/gpu/cuda/mat_operators.cu	[deleted file]	patch \| blob \| history
modules/gpu/include/opencv2/gpu/devmem2d.hpp		patch \| blob \| history
modules/gpu/include/opencv2/gpu/gpu.hpp		patch \| blob \| history
modules/gpu/include/opencv2/gpu/matpl.hpp	[deleted file]	patch \| blob \| history
modules/gpu/include/opencv2/gpu/matrix_operations.hpp	[moved from modules/gpu/include/opencv2/gpu/gpumat.hpp with 75% similarity]	patch \| blob \| history
modules/gpu/include/opencv2/gpu/stream_accessor.hpp	[new file with mode: 0644]	patch \| blob
modules/gpu/src/cuda/cuda_shared.hpp	[moved from modules/gpu/cuda/cuda_shared.hpp with 81% similarity]	patch \| blob \| history
modules/gpu/src/cuda/stereobm.cu	[moved from modules/gpu/cuda/stereobm.cu with 100% similarity]	patch \| blob \| history
modules/gpu/src/cudastream.cpp		patch \| blob \| history
modules/gpu/src/initialization.cpp		patch \| blob \| history
modules/gpu/src/matrix_operations.cpp	[moved from modules/gpu/src/gpumat.cpp with 56% similarity]	patch \| blob \| history
modules/gpu/src/precomp.cpp		patch \| blob \| history
modules/gpu/src/precomp.hpp		patch \| blob \| history
modules/gpu/src/stereobm_gpu.cpp		patch \| blob \| history