From 5ea8085220ae32a6b368f8a00146b73e39af3fb2 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <vlad.vinogradov@itseez.com>
Date: Mon, 7 Oct 2013 18:25:55 +0400
Subject: [PATCH] added BufferPool class

---
 modules/core/include/opencv2/core/cuda.hpp         |  1 +
 modules/core/include/opencv2/core/private.cuda.hpp | 24 ++++++++++++-----
 modules/core/src/cuda_buffer_pool.cpp              | 31 ++++++++++++----------
 modules/core/src/cuda_stream.cpp                   | 10 +++++++
 modules/cuda/perf/perf_buffer_pool.cpp             | 10 +++----
 modules/cuda/test/test_buffer_pool.cpp             | 18 ++++++-------
 6 files changed, 60 insertions(+), 34 deletions(-)
diff --git a/modules/core/include/opencv2/core/cuda.hpp b/modules/core/include/opencv2/core/cuda.hpp
index 90c21cf..1911959 100644
--- a/modules/core/include/opencv2/core/cuda.hpp
+++ b/modules/core/include/opencv2/core/cuda.hpp
@@ -398,6 +398,7 @@ private:
     Stream(const Ptr<Impl>& impl);
 
     friend struct StreamAccessor;
+    friend class BufferPool;
 };
 
 class CV_EXPORTS Event
diff --git a/modules/core/include/opencv2/core/private.cuda.hpp b/modules/core/include/opencv2/core/private.cuda.hpp
index 3760c18..2088884 100644
--- a/modules/core/include/opencv2/core/private.cuda.hpp
+++ b/modules/core/include/opencv2/core/private.cuda.hpp
@@ -92,24 +92,36 @@ namespace cv { namespace cuda
 {
     class MemoryStack;
 
-    class CV_EXPORTS BufferAllocator : public GpuMat::Allocator
+    class CV_EXPORTS StackAllocator : public GpuMat::Allocator
     {
     public:
-        explicit BufferAllocator(Stream& stream);
-        ~BufferAllocator();
+        explicit StackAllocator(cudaStream_t stream);
+        ~StackAllocator();
 
         bool allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize);
         void free(uchar* devPtr, int* refcount);
 
     private:
-        BufferAllocator(const BufferAllocator&);
-        BufferAllocator& operator =(const BufferAllocator&);
+        StackAllocator(const StackAllocator&);
+        StackAllocator& operator =(const StackAllocator&);
 
+        cudaStream_t stream_;
         MemoryStack* memStack_;
-        Stream stream_;
         size_t alignment_;
     };
 
+    class CV_EXPORTS BufferPool
+    {
+    public:
+        explicit BufferPool(Stream& stream);
+
+        GpuMat getBuffer(int rows, int cols, int type);
+        GpuMat getBuffer(Size size, int type) { return getBuffer(size.height, size.width, type); }
+
+    private:
+        GpuMat::Allocator* allocator_;
+    };
+
     CV_EXPORTS void setBufferAllocatorUsage(bool on);
     CV_EXPORTS void allocateMemoryPool(int deviceId, size_t stackSize, int stackCount);
 
diff --git a/modules/core/src/cuda_buffer_pool.cpp b/modules/core/src/cuda_buffer_pool.cpp
index 162811e..dd24f4f 100644
--- a/modules/core/src/cuda_buffer_pool.cpp
+++ b/modules/core/src/cuda_buffer_pool.cpp
@@ -299,14 +299,14 @@ namespace
 }
 
 /////////////////////////////////////////////////////////////
-/// BufferAllocator
+/// StackAllocator
 
 namespace
 {
     bool enableMemoryPool = true;
 }
 
-cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), stream_(stream)
+cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream), memStack_(0)
 {
     if (enableMemoryPool)
     {
@@ -318,19 +318,12 @@ cv::cuda::BufferAllocator::BufferAllocator(Stream& stream) : memStack_(0), strea
     }
 }
 
-namespace
+cv::cuda::StackAllocator::~StackAllocator()
 {
-    void CUDART_CB returnMemStackCallback(cudaStream_t, cudaError_t, void* userData)
-    {
-        MemoryStack* memStack = static_cast<MemoryStack*>(userData);
-        memStack->pool->returnMemStack(memStack);
-    }
-}
+    cudaStreamSynchronize(stream_);
 
-cv::cuda::BufferAllocator::~BufferAllocator()
-{
     if (memStack_ != 0)
-        CV_CUDEV_SAFE_CALL( cudaStreamAddCallback(StreamAccessor::getStream(stream_), returnMemStackCallback, memStack_, 0) );
+        memStack_->pool->returnMemStack(memStack_);
 }
 
 namespace
@@ -344,7 +337,7 @@ namespace
     }
 }
 
-bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize)
+bool cv::cuda::StackAllocator::allocate(uchar** devPtr, size_t* step, int** refcount, int rows, int cols, size_t elemSize)
 {
     if (memStack_ == 0)
         return false;
@@ -376,7 +369,7 @@ bool cv::cuda::BufferAllocator::allocate(uchar** devPtr, size_t* step, int** ref
     return true;
 }
 
-void cv::cuda::BufferAllocator::free(uchar* devPtr, int* refcount)
+void cv::cuda::StackAllocator::free(uchar* devPtr, int* refcount)
 {
     if (memStack_ == 0)
         return;
@@ -413,4 +406,14 @@ void cv::cuda::allocateMemoryPool(int deviceId, size_t stackSize, int stackCount
     setDevice(currentDevice);
 }
 
+/////////////////////////////////////////////////////////////
+/// BufferPool
+
+GpuMat cv::cuda::BufferPool::getBuffer(int rows, int cols, int type)
+{
+    GpuMat buf(allocator_);
+    buf.create(rows, cols, type);
+    return buf;
+}
+
 #endif
diff --git a/modules/core/src/cuda_stream.cpp b/modules/core/src/cuda_stream.cpp
index be29891..9f190c3 100644
--- a/modules/core/src/cuda_stream.cpp
+++ b/modules/core/src/cuda_stream.cpp
@@ -66,6 +66,7 @@ class cv::cuda::Stream::Impl
 {
 public:
     cudaStream_t stream;
+    Ptr<StackAllocator> stackAllocator_;
 
     Impl();
     Impl(cudaStream_t stream);
@@ -73,17 +74,26 @@ public:
     ~Impl();
 };
 
+cv::cuda::BufferPool::BufferPool(Stream& stream) : allocator_(stream.impl_->stackAllocator_.get())
+{
+}
+
 cv::cuda::Stream::Impl::Impl() : stream(0)
 {
     cudaSafeCall( cudaStreamCreate(&stream) );
+
+    stackAllocator_ = makePtr<StackAllocator>(stream);
 }
 
 cv::cuda::Stream::Impl::Impl(cudaStream_t stream_) : stream(stream_)
 {
+    stackAllocator_ = makePtr<StackAllocator>(stream);
 }
 
 cv::cuda::Stream::Impl::~Impl()
 {
+    stackAllocator_.release();
+
     if (stream)
         cudaStreamDestroy(stream);
 }
diff --git a/modules/cuda/perf/perf_buffer_pool.cpp b/modules/cuda/perf/perf_buffer_pool.cpp
index 9bab24b..7a0dca3 100644
--- a/modules/cuda/perf/perf_buffer_pool.cpp
+++ b/modules/cuda/perf/perf_buffer_pool.cpp
@@ -56,9 +56,9 @@ namespace
 {
     void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
     {
-        BufferAllocator bufAlloc(stream);
+        BufferPool pool(stream);
 
-        GpuMat buf(&bufAlloc);
+        GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
 
         src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
 
@@ -67,13 +67,13 @@ namespace
 
     void func2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
     {
-        BufferAllocator bufAlloc(stream);
+        BufferPool pool(stream);
 
-        GpuMat buf1(&bufAlloc);
+        GpuMat buf1 = pool.getBuffer(src1.size(), CV_32FC(src1.channels()));
 
         func1(src1, buf1, stream);
 
-        GpuMat buf2(&bufAlloc);
+        GpuMat buf2 = pool.getBuffer(src2.size(), CV_32FC(src2.channels()));
 
         func1(src2, buf2, stream);
 
diff --git a/modules/cuda/test/test_buffer_pool.cpp b/modules/cuda/test/test_buffer_pool.cpp
index ea3ca80..2526358 100644
--- a/modules/cuda/test/test_buffer_pool.cpp
+++ b/modules/cuda/test/test_buffer_pool.cpp
@@ -52,7 +52,7 @@ using namespace testing;
 using namespace cv;
 using namespace cv::cuda;
 
-struct BufferPool : TestWithParam<DeviceInfo>
+struct BufferPoolTest : TestWithParam<DeviceInfo>
 {
 };
 
@@ -60,9 +60,9 @@ namespace
 {
     void func1(const GpuMat& src, GpuMat& dst, Stream& stream)
     {
-        BufferAllocator bufAlloc(stream);
+        BufferPool pool(stream);
 
-        GpuMat buf(&bufAlloc);
+        GpuMat buf = pool.getBuffer(src.size(), CV_32FC(src.channels()));
 
         src.convertTo(buf, CV_32F, 1.0 / 255.0, stream);
 
@@ -71,17 +71,17 @@ namespace
 
     void func2(const GpuMat& src, GpuMat& dst, Stream& stream)
     {
-        BufferAllocator bufAlloc(stream);
+        BufferPool pool(stream);
 
-        GpuMat buf1(&bufAlloc);
+        GpuMat buf1 = pool.getBuffer(saturate_cast<int>(src.rows * 0.5), saturate_cast<int>(src.cols * 0.5), src.type());
 
         cuda::resize(src, buf1, Size(), 0.5, 0.5, cv::INTER_NEAREST, stream);
 
-        GpuMat buf2(&bufAlloc);
+        GpuMat buf2 = pool.getBuffer(buf1.size(), CV_32FC(buf1.channels()));
 
         func1(buf1, buf2, stream);
 
-        GpuMat buf3(&bufAlloc);
+        GpuMat buf3 = pool.getBuffer(src.size(), buf2.type());
 
         cuda::resize(buf2, buf3, src.size(), 0, 0, cv::INTER_NEAREST, stream);
 
@@ -89,7 +89,7 @@ namespace
     }
 }
 
-CUDA_TEST_P(BufferPool, Test)
+CUDA_TEST_P(BufferPoolTest, SimpleUsage)
 {
     DeviceInfo devInfo = GetParam();
     setDevice(devInfo.deviceID());
@@ -115,6 +115,6 @@ CUDA_TEST_P(BufferPool, Test)
     ASSERT_MAT_NEAR(dst_gold, dst, 0);
 }
 
-INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPool, ALL_DEVICES);
+INSTANTIATE_TEST_CASE_P(CUDA_Stream, BufferPoolTest, ALL_DEVICES);
 
 #endif // HAVE_CUDA
-- 
2.7.4