From 6a769c92b3b6bcfb6c312fe4df9dd908fd354ae2 Mon Sep 17 00:00:00 2001 From: Ernest Galbrun Date: Wed, 23 Jul 2014 14:16:53 +0200 Subject: [PATCH] modified default stream initialization to allow concurrent calls modified cuda surf.cuda.cpp to allow concurrent call --- modules/core/src/cuda_buffer_pool.cpp | 31 ++++++++++++++++++++++++------- modules/core/src/cuda_stream.cpp | 16 ++++++++++++++-- modules/nonfree/src/surf.cuda.cpp | 8 +++++++- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/modules/core/src/cuda_buffer_pool.cpp b/modules/core/src/cuda_buffer_pool.cpp index ea060a7..e5caf6e 100644 --- a/modules/core/src/cuda_buffer_pool.cpp +++ b/modules/core/src/cuda_buffer_pool.cpp @@ -207,7 +207,6 @@ namespace MemoryStack* MemoryPool::getFreeMemStack() { AutoLock lock(mtx_); - if (!initialized_) initilizeImpl(); @@ -256,22 +255,31 @@ namespace namespace { + Mutex mtx_; + bool memory_pool_manager_initialized; + class MemoryPoolManager { public: MemoryPoolManager(); ~MemoryPoolManager(); + void Init(); MemoryPool* getPool(int deviceId); private: std::vector pools_; - }; + } manager; + + //MemoryPoolManager ; MemoryPoolManager::MemoryPoolManager() { - int deviceCount = getCudaEnabledDeviceCount(); + } + void MemoryPoolManager::Init() + { + int deviceCount = getCudaEnabledDeviceCount(); if (deviceCount > 0) pools_.resize(deviceCount); } @@ -280,7 +288,7 @@ namespace { for (size_t i = 0; i < pools_.size(); ++i) { - cudaSetDevice(i); + cudaSetDevice(static_cast(i)); pools_[i].release(); } } @@ -293,7 +301,14 @@ namespace MemoryPool* memPool(int deviceId) { - static MemoryPoolManager manager; + { + AutoLock lock(mtx_); + if (!memory_pool_manager_initialized) + { + memory_pool_manager_initialized = true; + manager.Init(); + } + } return manager.getPool(deviceId); } } @@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), if (enableMemoryPool) { const int deviceId = getDevice(); - memStack_ = memPool(deviceId)->getFreeMemStack(); - + { + AutoLock lock(mtx_); + memStack_ = memPool(deviceId)->getFreeMemStack(); + } DeviceInfo devInfo(deviceId); alignment_ = devInfo.textureAlignment(); } diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp index 9f190c3..1f73a8e 100644 --- a/modules/core/src/cuda_stream.cpp +++ b/modules/core/src/cuda_stream.cpp @@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa #endif } +namespace +{ + bool default_stream_is_initialized; + Mutex mtx; + Ptr default_stream; +} + Stream& cv::cuda::Stream::Null() { - static Stream s(Ptr(new Impl(0))); - return s; + AutoLock lock(mtx); + if (!default_stream_is_initialized) + { + default_stream = Ptr(new Stream(Ptr(new Impl(0)))); + default_stream_is_initialized = true; + } + return *default_stream; } cv::cuda::Stream::operator bool_type() const diff --git a/modules/nonfree/src/surf.cuda.cpp b/modules/nonfree/src/surf.cuda.cpp index 4089b50..461ba0f 100644 --- a/modules/nonfree/src/surf.cuda.cpp +++ b/modules/nonfree/src/surf.cuda.cpp @@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf; namespace { + Mutex mtx; + int calcSize(int octave, int layer) { /* Wavelet size at first layer of first octave. */ @@ -166,7 +168,6 @@ namespace { const int layer_rows = img_rows >> octave; const int layer_cols = img_cols >> octave; - loadOctaveConstants(octave, layer_rows, layer_cols); icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers); @@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std: void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints) { + AutoLock lock(mtx); if (!img.empty()) { SURF_CUDA_Invoker surf(*this, img, mask); @@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors, bool useProvidedKeypoints) { + AutoLock lock(mtx); if (!img.empty()) { SURF_CUDA_Invoker surf(*this, img, mask); @@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector& keypoints) { + AutoLock lock(mtx); GpuMat keypointsGPU; (*this)(img, mask, keypointsGPU); @@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std: void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors, bool useProvidedKeypoints) { + AutoLock lock(mtx); GpuMat keypointsGPU; if (useProvidedKeypoints) @@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std: void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector& keypoints, std::vector& descriptors, bool useProvidedKeypoints) { + AutoLock lock(mtx); GpuMat descriptorsGPU; (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints); -- 2.7.4