namespace cv { namespace gpu
{
+ //////////////////////////////// CudaMem ////////////////////////////////
+ // CudaMem is limited cv::Mat with page locked memory allocation.
+ // Page locked memory is only needed for async and faster coping to GPU.
+ // It is convertable to cv::Mat header without reference counting
+ // so you can use it with other opencv functions.
+
+ // Page-locks the matrix m memory and maps it for the device(s)
+ CV_EXPORTS void registerPageLocked(Mat& m);
+ // Unmaps the memory of matrix m, and makes it pageable again.
+ CV_EXPORTS void unregisterPageLocked(Mat& m);
+
+ class CV_EXPORTS CudaMem
+ {
+ public:
+ enum { ALLOC_PAGE_LOCKED = 1, ALLOC_ZEROCOPY = 2, ALLOC_WRITE_COMBINED = 4 };
+
+ CudaMem();
+ CudaMem(const CudaMem& m);
+
+ CudaMem(int rows, int cols, int type, int _alloc_type = ALLOC_PAGE_LOCKED);
+ CudaMem(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
+
+
+ //! creates from cv::Mat with coping data
+ explicit CudaMem(const Mat& m, int alloc_type = ALLOC_PAGE_LOCKED);
+
+ ~CudaMem();
+
+ CudaMem& operator = (const CudaMem& m);
+
+ //! returns deep copy of the matrix, i.e. the data is copied
+ CudaMem clone() const;
+
+ //! allocates new matrix data unless the matrix already has specified size and type.
+ void create(int rows, int cols, int type, int alloc_type = ALLOC_PAGE_LOCKED);
+ void create(Size size, int type, int alloc_type = ALLOC_PAGE_LOCKED);
+
+ //! decrements reference counter and released memory if needed.
+ void release();
+
+ //! returns matrix header with disabled reference counting for CudaMem data.
+ Mat createMatHeader() const;
+ operator Mat() const;
+
+ //! maps host memory into device address space and returns GpuMat header for it. Throws exception if not supported by hardware.
+ GpuMat createGpuMatHeader() const;
+ operator GpuMat() const;
+
+ //returns if host memory can be mapperd to gpu address space;
+ static bool canMapHostMemory();
+
+ // Please see cv::Mat for descriptions
+ bool isContinuous() const;
+ size_t elemSize() const;
+ size_t elemSize1() const;
+ int type() const;
+ int depth() const;
+ int channels() const;
+ size_t step1() const;
+ Size size() const;
+ bool empty() const;
+
+
+ // Please see cv::Mat for descriptions
+ int flags;
+ int rows, cols;
+ size_t step;
+
+ uchar* data;
+ int* refcount;
+
+ uchar* datastart;
+ uchar* dataend;
+
+ int alloc_type;
+ };
+
+
+ //////////////////////////////// CudaStream ////////////////////////////////
+ // Encapculates Cuda Stream. Provides interface for async coping.
+ // Passed to each function that supports async kernel execution.
+ // Reference counting is enabled
+
+ class CV_EXPORTS Stream
+ {
+ public:
+ Stream();
+ ~Stream();
+
+ Stream(const Stream&);
+ Stream& operator =(const Stream&);
+
+ bool queryIfComplete();
+ void waitForCompletion();
+
+ //! downloads asynchronously
+ // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
+ void enqueueDownload(const GpuMat& src, CudaMem& dst);
+ void enqueueDownload(const GpuMat& src, Mat& dst);
+
+ //! uploads asynchronously
+ // Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
+ void enqueueUpload(const CudaMem& src, GpuMat& dst);
+ void enqueueUpload(const Mat& src, GpuMat& dst);
+
+ //! copy asynchronously
+ void enqueueCopy(const GpuMat& src, GpuMat& dst);
+
+ //! memory set asynchronously
+ void enqueueMemSet(GpuMat& src, Scalar val);
+ void enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask);
+
+ //! converts matrix type, ex from float to uchar depending on type
+ void enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype, double a = 1, double b = 0);
+
+ //! adds a callback to be called on the host after all currently enqueued items in the stream have completed
+ typedef void (*StreamCallback)(Stream& stream, int status, void* userData);
+ void enqueueHostCallback(StreamCallback callback, void* userData);
+
+ static Stream& Null();
+
+ operator bool() const;
+
+ private:
+ struct Impl;
+
+ explicit Stream(Impl* impl);
+ void create();
+ void release();
+
+ Impl *impl;
+
+ friend struct StreamAccessor;
+ };
+
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
--- /dev/null
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other GpuMaterials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
+#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
+
+#include "opencv2/gpu/gpu.hpp"
+#include "cuda_runtime_api.h"
+
+namespace cv
+{
+ namespace gpu
+ {
+ // This is only header file that depends on Cuda. All other headers are independent.
+ // So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
+ // But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
+ // In this case you have to install Cuda Toolkit.
+ struct StreamAccessor
+ {
+ CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
+ };
+ }
+}
+
+#endif /* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
\ No newline at end of file
#include "opencv2/gpu/stream_accessor.hpp"
+namespace
+{
+#if defined(__GNUC__)
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
+#endif
+
+ inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
+ {
+ if (cudaSuccess != err)
+ cv::gpu::error(cudaGetErrorString(err), file, line, func);
+ }
+
+ inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
+ {
+ if (err < 0)
+ {
+ std::ostringstream msg;
+ msg << "NPP API Call Error: " << err;
+ cv::gpu::error(msg.str().c_str(), file, line, func);
+ }
+ }
+}
+
namespace cv { namespace gpu
{
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream);
//M*/
#include "precomp.hpp"
+#include "opencv2/core/gpumat.hpp"
using namespace cv;
using namespace cv::gpu;
return data == 0;
}
-#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
+#if !defined (HAVE_CUDA)
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
void cv::gpu::unregisterPageLocked(Mat&) { throw_nogpu(); }
GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); }
#else /* !defined (HAVE_CUDA) */
+#include <cuda_runtime_api.h>
+namespace
+{
+#if defined(__GNUC__)
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__)
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__, __func__)
+#else /* defined(__CUDACC__) || defined(__MSVC__) */
+ #define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
+ #define nppSafeCall(expr) ___nppSafeCall(expr, __FILE__, __LINE__)
+#endif
+
+ inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
+ {
+ if (cudaSuccess != err)
+ cv::gpu::error(cudaGetErrorString(err), file, line, func);
+ }
+
+ inline void ___nppSafeCall(int err, const char *file, const int line, const char *func = "")
+ {
+ if (err < 0)
+ {
+ std::ostringstream msg;
+ msg << "NPP API Call Error: " << err;
+ cv::gpu::error(msg.str().c_str(), file, line, func);
+ }
+ }
+}
void cv::gpu::registerPageLocked(Mat& m)
{
// In this case you have to install Cuda Toolkit.
struct StreamAccessor
{
- CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
+ CV_EXPORTS static cudaStream_t getStream(const cv::gpu::Stream& stream);
};
}
}
set(lib_device_hdrs "")
if (HAVE_CUDA AND lib_device_srcs)
- ocv_include_directories(${CUDA_INCLUDE_DIRS})
+ ocv_include_directories(${CUDA_INCLUDE_DIRS} "${OpenCV_SOURCE_DIR}/modules/gpu/include")
file(GLOB_RECURSE lib_device_hdrs "src/cuda/*.hpp")
ocv_cuda_compile(device_objs ${lib_device_srcs})
if (NAVE_CUDA)
set(cuda_deps ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
endif()
-ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL opencv_gpu ${cuda_deps})
+ocv_add_module(softcascade opencv_core opencv_imgproc opencv_ml OPTIONAL ${cuda_deps})
if(HAVE_CUDA)
ocv_module_include_directories(${CUDA_INCLUDE_DIRS})
namespace cv { namespace softcascade { namespace device {
+typedef unsigned char uchar;
+
template <int FACTOR>
__device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
{
flds.mask.create( rois.cols / shr, rois.rows / shr, rois.type());
- cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
- cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
+ //cv::gpu::resize(rois, flds.genRoiTmp, cv::Size(), 1.f / shr, 1.f / shr, CV_INTER_AREA, s);
+ //cv::gpu::transpose(flds.genRoiTmp, flds.mask, s);
if (type == CV_8UC3)
{
flds.createLevels(image.rows, image.cols);
flds.preprocessor->apply(image, flds.shrunk);
- cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
+ //cv::gpu::integralBuffered(flds.shrunk, flds.hogluv, flds.integralBuffer, s);
}
else
{
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
setZero(channels, s);
- cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
+ //cv::gpu::cvtColor(frame, gray, CV_BGR2GRAY, s);
createHogBins(s);
createLuvBins(frame, s);
- cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
+ //cv::gpu::resize(channels, shrunk, cv::Size(), 1.f / shrinkage, 1.f / shrinkage, CV_INTER_AREA, s);
}
private:
cv::gpu::GpuMat dfdx(fplane, cv::Rect(0, 0, fw, fh));
cv::gpu::GpuMat dfdy(fplane, cv::Rect(0, fh, fw, fh));
- cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
- cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
+ //cv::gpu::Sobel(gray, dfdx, CV_32F, 1, 0, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
+ //cv::gpu::Sobel(gray, dfdy, CV_32F, 0, 1, sobelBuf, 3, 1, cv::BORDER_DEFAULT, -1, s);
cv::gpu::GpuMat mag(fplane, cv::Rect(0, 2 * fh, fw, fh));
cv::gpu::GpuMat ang(fplane, cv::Rect(0, 3 * fh, fw, fh));
- cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
+ //cv::gpu::cartToPolar(dfdx, dfdy, mag, ang, true, s);
// normalize magnitude to uchar interval and angles to 6 bins
cv::gpu::GpuMat nmag(fplane, cv::Rect(0, 4 * fh, fw, fh));
cv::gpu::GpuMat nang(fplane, cv::Rect(0, 5 * fh, fw, fh));
- cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
- cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
+ //cv::gpu::multiply(mag, cv::Scalar::all(1.f / (8 *::log(2.0f))), nmag, 1, -1, s);
+ //cv::gpu::multiply(ang, cv::Scalar::all(1.f / 60.f), nang, 1, -1, s);
//create uchar magnitude
cv::gpu::GpuMat cmag(channels, cv::Rect(0, fh * HOG_BINS, fw, fh));
static const int fw = colored.cols;
static const int fh = colored.rows;
- cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
+ //cv::gpu::cvtColor(colored, luv, CV_BGR2Luv, s);
std::vector<cv::gpu::GpuMat> splited;
for(int i = 0; i < LUV_BINS; ++i)
splited.push_back(cv::gpu::GpuMat(channels, cv::Rect(0, fh * (7 + i), fw, fh)));
}
- cv::gpu::split(luv, splited, s);
+ //cv::gpu::split(luv, splited, s);
}
enum {HOG_BINS = 6, LUV_BINS = 3};
virtual void apply(InputArray _frame, OutputArray _shrunk, cv::gpu::Stream& s = cv::gpu::Stream::Null())
{
const cv::gpu::GpuMat frame = _frame.getGpuMat();
- cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
+ //cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
_shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
cv::gpu::GpuMat shrunk = _shrunk.getGpuMat();
channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
setZero(channels, s);
- cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
+ //cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
cv::softcascade::device::gray2hog(gray, channels(cv::Rect(0, 0, bgr.cols, bgr.rows * (bins + 1))), bins);
cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));