core/ocl: OpenCLBufferPool
authorAlexander Alekhin <alexander.alekhin@itseez.com>
Thu, 16 Jan 2014 14:30:39 +0000 (18:30 +0400)
committerAlexander Alekhin <alexander.alekhin@itseez.com>
Tue, 4 Feb 2014 12:48:35 +0000 (16:48 +0400)
modules/core/include/opencv2/core/bufferpool.hpp [new file with mode: 0644]
modules/core/include/opencv2/core/mat.hpp
modules/core/include/opencv2/core/ocl.hpp
modules/core/perf/opencl/perf_bufferpool.cpp [new file with mode: 0644]
modules/core/src/bufferpool.impl.hpp [new file with mode: 0644]
modules/core/src/matrix.cpp
modules/core/src/ocl.cpp
modules/core/src/precomp.hpp
modules/core/src/umatrix.cpp
modules/core/test/test_umat.cpp

diff --git a/modules/core/include/opencv2/core/bufferpool.hpp b/modules/core/include/opencv2/core/bufferpool.hpp
new file mode 100644 (file)
index 0000000..c2de95a
--- /dev/null
@@ -0,0 +1,26 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__
+#define __OPENCV_CORE_BUFFER_POOL_HPP__
+
+namespace cv
+{
+
+class BufferPoolController
+{
+protected:
+    ~BufferPoolController() { }
+public:
+    virtual size_t getReservedSize() const = 0;
+    virtual size_t getMaxReservedSize() const = 0;
+    virtual void setMaxReservedSize(size_t size) = 0;
+    virtual void freeAllReservedBuffers() = 0;
+};
+
+}
+
+#endif // __OPENCV_CORE_BUFFER_POOL_HPP__
index d9f06cb..8099520 100644 (file)
@@ -51,6 +51,7 @@
 #include "opencv2/core/matx.hpp"
 #include "opencv2/core/types.hpp"
 
+#include "opencv2/core/bufferpool.hpp"
 
 namespace cv
 {
@@ -299,6 +300,9 @@ public:
     virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[],
                       const size_t srcofs[], const size_t srcstep[],
                       const size_t dstofs[], const size_t dststep[], bool sync) const;
+
+    // default implementation returns DummyBufferPoolController
+    virtual BufferPoolController* getBufferPoolController() const;
 };
 
 
@@ -363,7 +367,7 @@ struct CV_EXPORTS UMatData
     int refcount;
     uchar* data;
     uchar* origdata;
-    size_t size;
+    size_t size, capacity;
 
     int flags;
     void* handle;
index 3a28a3f..0df3b81 100644 (file)
@@ -596,6 +596,9 @@ protected:
     Impl* p;
 };
 
+
+CV_EXPORTS MatAllocator* getOpenCLAllocator();
+
 }}
 
 #endif
diff --git a/modules/core/perf/opencl/perf_bufferpool.cpp b/modules/core/perf/opencl/perf_bufferpool.cpp
new file mode 100644 (file)
index 0000000..abb0757
--- /dev/null
@@ -0,0 +1,132 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#include "perf_precomp.hpp"
+#include "opencv2/ts/ocl_perf.hpp"
+
+#ifdef HAVE_OPENCL
+
+namespace cvtest {
+namespace ocl {
+
+struct BufferPoolState
+{
+    BufferPoolController* controller_;
+    size_t oldMaxReservedSize_;
+
+    BufferPoolState(BufferPoolController* c, bool enable)
+        : controller_(c)
+    {
+        if (!cv::ocl::useOpenCL())
+        {
+            throw ::perf::TestBase::PerfSkipTestException();
+        }
+        oldMaxReservedSize_ = c->getMaxReservedSize();
+        if (oldMaxReservedSize_ == (size_t)-1)
+        {
+            throw ::perf::TestBase::PerfSkipTestException();
+        }
+        if (!enable)
+        {
+            c->setMaxReservedSize(0);
+        }
+        else
+        {
+            c->freeAllReservedBuffers();
+        }
+    }
+
+    ~BufferPoolState()
+    {
+        controller_->setMaxReservedSize(oldMaxReservedSize_);
+    }
+};
+
+typedef TestBaseWithParam<bool> BufferPoolFixture;
+
+OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCreation100, Bool())
+{
+    BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
+
+    Size sz(1920, 1080);
+
+    OCL_TEST_CYCLE()
+    {
+        for (int i = 0; i < 100; i++)
+        {
+            UMat u(sz, CV_8UC1);
+        }
+    }
+
+    SANITY_CHECK_NOTHING()
+}
+
+OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCountNonZero100, Bool())
+{
+    BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
+
+    Size sz(1920, 1080);
+
+    OCL_TEST_CYCLE()
+    {
+        for (int i = 0; i < 100; i++)
+        {
+            UMat u(sz, CV_8UC1);
+            countNonZero(u);
+        }
+    }
+
+    SANITY_CHECK_NOTHING()
+}
+
+OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCanny10, Bool())
+{
+    BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
+
+    Size sz(1920, 1080);
+
+    int aperture = 3;
+    bool useL2 = false;
+    double thresh_low = 100;
+    double thresh_high = 120;
+
+    OCL_TEST_CYCLE()
+    {
+        for (int i = 0; i < 10; i++)
+        {
+            UMat src(sz, CV_8UC1);
+            UMat dst;
+            Canny(src, dst, thresh_low, thresh_high, aperture, useL2);
+            dst.getMat(ACCESS_READ); // complete async operations
+        }
+    }
+
+    SANITY_CHECK_NOTHING()
+}
+
+OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatIntegral10, Bool())
+{
+    BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam());
+
+    Size sz(1920, 1080);
+
+    OCL_TEST_CYCLE()
+    {
+        for (int i = 0; i < 10; i++)
+        {
+            UMat src(sz, CV_32FC1);
+            UMat dst;
+            integral(src, dst);
+            dst.getMat(ACCESS_READ); // complete async operations
+        }
+    }
+
+    SANITY_CHECK_NOTHING()
+}
+
+} } // namespace cvtest::ocl
+
+#endif // HAVE_OPENCL
diff --git a/modules/core/src/bufferpool.impl.hpp b/modules/core/src/bufferpool.impl.hpp
new file mode 100644 (file)
index 0000000..18a90e0
--- /dev/null
@@ -0,0 +1,28 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
+
+#ifndef __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__
+#define __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__
+
+#include "opencv2/core/bufferpool.hpp"
+
+namespace cv {
+
+class DummyBufferPoolController : public BufferPoolController
+{
+public:
+    DummyBufferPoolController() { }
+    virtual ~DummyBufferPoolController() { }
+
+    virtual size_t getReservedSize() const { return (size_t)-1; }
+    virtual size_t getMaxReservedSize() const { return (size_t)-1; }
+    virtual void setMaxReservedSize(size_t size) { (void)size; }
+    virtual void freeAllReservedBuffers() { }
+};
+
+} // namespace
+
+#endif // __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__
index 87e4fd5..8a11d09 100644 (file)
@@ -43,6 +43,8 @@
 #include "precomp.hpp"
 #include "opencl_kernels.hpp"
 
+#include "bufferpool.impl.hpp"
+
 /****************************************************************************************\
 *                           [scaled] Identity matrix initialization                      *
 \****************************************************************************************/
@@ -157,6 +159,12 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
         memcpy(ptrs[1], ptrs[0], planesz);
 }
 
+BufferPoolController* MatAllocator::getBufferPoolController() const
+{
+    static DummyBufferPoolController dummy;
+    return &dummy;
+}
+
 class StdMatAllocator : public MatAllocator
 {
 public:
index d8254cb..6f19b56 100644 (file)
 //M*/
 
 #include "precomp.hpp"
+#include <list>
 #include <map>
 #include <string>
 #include <sstream>
 #include <iostream> // std::cerr
 
+#include "opencv2/core/bufferpool.hpp"
+#ifndef LOG_BUFFER_POOL
+# if 0
+#   define LOG_BUFFER_POOL printf
+# else
+#   define LOG_BUFFER_POOL(...)
+# endif
+#endif
+
+// TODO Move to some common place
+static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
+{
+    const char* envValue = getenv(name);
+    if (envValue == NULL)
+    {
+        return defaultValue;
+    }
+    cv::String value = envValue;
+    size_t pos = 0;
+    for (; pos < value.size(); pos++)
+    {
+        if (!isdigit(value[pos]))
+            break;
+    }
+    cv::String valueStr = value.substr(0, pos);
+    cv::String suffixStr = value.substr(pos, value.length() - pos);
+    int v = atoi(valueStr.c_str());
+    if (suffixStr.length() == 0)
+        return v;
+    else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
+        return v * 1024 * 1024;
+    else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
+        return v * 1024;
+    CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
+}
+
 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
 
@@ -3234,8 +3271,208 @@ ProgramSource2::hash_t ProgramSource2::hash() const
 
 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
 
+class OpenCLBufferPool
+{
+protected:
+    ~OpenCLBufferPool() { }
+public:
+    virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0;
+    virtual void release(cl_mem handle, size_t capacity) = 0;
+};
+
+class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool
+{
+public:
+    struct BufferEntry
+    {
+        cl_mem clBuffer_;
+        size_t capacity_;
+    };
+protected:
+    Mutex mutex_;
+
+    size_t currentReservedSize;
+    size_t maxReservedSize;
+
+    std::list<BufferEntry> reservedEntries_; // LRU order
+
+    // synchronized
+    bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
+    {
+        if (reservedEntries_.empty())
+            return false;
+        std::list<BufferEntry>::iterator i = reservedEntries_.begin();
+        std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
+        BufferEntry result = {NULL, 0};
+        size_t minDiff = (size_t)(-1);
+        for (; i != reservedEntries_.end(); ++i)
+        {
+            BufferEntry& e = *i;
+            if (e.capacity_ >= size)
+            {
+                size_t diff = e.capacity_ - size;
+                if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff))
+                {
+                    minDiff = diff;
+                    result_pos = i;
+                    result = e;
+                    if (diff == 0)
+                        break;
+                }
+            }
+        }
+        if (result_pos != reservedEntries_.end())
+        {
+            //CV_DbgAssert(result == *result_pos);
+            reservedEntries_.erase(result_pos);
+            entry = result;
+            currentReservedSize -= entry.capacity_;
+            return true;
+        }
+        return false;
+    }
+
+    // synchronized
+    void _checkSizeOfReservedEntries()
+    {
+        while (currentReservedSize > maxReservedSize)
+        {
+            CV_DbgAssert(!reservedEntries_.empty());
+            const BufferEntry& entry = reservedEntries_.back();
+            CV_DbgAssert(currentReservedSize >= entry.capacity_);
+            currentReservedSize -= entry.capacity_;
+            _releaseBufferEntry(entry);
+            reservedEntries_.pop_back();
+        }
+    }
+
+    inline size_t _allocationGranularity(size_t size)
+    {
+        // heuristic values
+        if (size < 1024)
+            return 16;
+        else if (size < 64*1024)
+            return 64;
+        else if (size < 1024*1024)
+            return 4096;
+        else if (size < 16*1024*1024)
+            return 64*1024;
+        else
+            return 1024*1024;
+    }
+
+    void _allocateBufferEntry(BufferEntry& entry, size_t size)
+    {
+        CV_DbgAssert(entry.clBuffer_ == NULL);
+        entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
+        Context2& ctx = Context2::getDefault();
+        cl_int retval = CL_SUCCESS;
+        entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval);
+        CV_Assert(retval == CL_SUCCESS);
+        CV_Assert(entry.clBuffer_ != NULL);
+        LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
+                (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
+    }
+
+    void _releaseBufferEntry(const BufferEntry& entry)
+    {
+        CV_Assert(entry.capacity_ != 0);
+        CV_Assert(entry.clBuffer_ != NULL);
+        LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
+                entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
+        clReleaseMemObject(entry.clBuffer_);
+    }
+public:
+    OpenCLBufferPoolImpl()
+        : currentReservedSize(0), maxReservedSize(0)
+    {
+        // Note: Buffer pool is disabled by default,
+        //       because we didn't receive significant performance improvement
+        maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", 0);
+    }
+    virtual ~OpenCLBufferPoolImpl()
+    {
+        freeAllReservedBuffers();
+        CV_Assert(reservedEntries_.empty());
+    }
+public:
+    virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity)
+    {
+        BufferEntry entry = {NULL, 0};
+        if (maxReservedSize > 0)
+        {
+            AutoLock locker(mutex_);
+            if (_findAndRemoveEntryFromReservedList(entry, size))
+            {
+                CV_DbgAssert(size <= entry.capacity_);
+                LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
+                capacity = entry.capacity_;
+                return entry.clBuffer_;
+            }
+        }
+        _allocateBufferEntry(entry, size);
+        capacity = entry.capacity_;
+        return entry.clBuffer_;
+    }
+    virtual void release(cl_mem handle, size_t capacity)
+    {
+        BufferEntry entry = {handle, capacity};
+        if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
+        {
+            _releaseBufferEntry(entry);
+        }
+        else
+        {
+            AutoLock locker(mutex_);
+            reservedEntries_.push_front(entry);
+            currentReservedSize += entry.capacity_;
+            _checkSizeOfReservedEntries();
+        }
+    }
+
+    virtual size_t getReservedSize() const { return currentReservedSize; }
+    virtual size_t getMaxReservedSize() const { return maxReservedSize; }
+    virtual void setMaxReservedSize(size_t size)
+    {
+        AutoLock locker(mutex_);
+        size_t oldMaxReservedSize = maxReservedSize;
+        maxReservedSize = size;
+        if (maxReservedSize < oldMaxReservedSize)
+        {
+            std::list<BufferEntry>::iterator i = reservedEntries_.begin();
+            for (; i != reservedEntries_.end();)
+            {
+                const BufferEntry& entry = *i;
+                if (entry.capacity_ > maxReservedSize / 8)
+                {
+                    CV_DbgAssert(currentReservedSize >= entry.capacity_);
+                    currentReservedSize -= entry.capacity_;
+                    _releaseBufferEntry(entry);
+                    i = reservedEntries_.erase(i);
+                    continue;
+                }
+                ++i;
+            }
+            _checkSizeOfReservedEntries();
+        }
+    }
+    virtual void freeAllReservedBuffers()
+    {
+        AutoLock locker(mutex_);
+        std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
+        for (; i != reservedEntries_.end(); ++i)
+        {
+            const BufferEntry& entry = *i;
+            _releaseBufferEntry(entry);
+        }
+        reservedEntries_.clear();
+    }
+};
+
+
 class OpenCLAllocator : public MatAllocator
 {
+    mutable OpenCLBufferPoolImpl bufferPool;
 public:
     OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); }
 
@@ -3274,17 +3511,18 @@ public:
         int createFlags = 0, flags0 = 0;
         getBestFlags(ctx, flags, createFlags, flags0);
 
-        cl_int retval = 0;
-        void* handle = clCreateBuffer((cl_context)ctx.ptr(),
-                                      createFlags, total, 0, &retval);
-        if( !handle || retval != CL_SUCCESS )
+        CV_Assert(createFlags == CL_MEM_READ_WRITE);
+        size_t capacity = 0;
+        void* handle = bufferPool.allocate(total, capacity);
+        if (!handle)
             return defaultAllocate(dims, sizes, type, data, step, flags);
         UMatData* u = new UMatData(this);
         u->data = 0;
         u->size = total;
+        u->capacity = capacity;
         u->handle = handle;
         u->flags = flags0;
-
+        CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency
         return u;
     }
 
@@ -3405,8 +3643,9 @@ public:
                 fastFree(u->data);
                 u->data = 0;
             }
-            clReleaseMemObject((cl_mem)u->handle);
+            bufferPool.release((cl_mem)u->handle, u->capacity);
             u->handle = 0;
+            u->capacity = 0;
             delete u;
         }
     }
@@ -3713,6 +3952,8 @@ public:
         }
     }
 
+    BufferPoolController* getBufferPoolController() const { return &bufferPool; }
+
     MatAllocator* matStdAllocator;
 };
 
index 3727b2f..ff5943b 100644 (file)
@@ -260,11 +260,6 @@ extern TLSData<CoreTLSData> coreTlsData;
 #define CL_RUNTIME_EXPORT
 #endif
 
-namespace ocl
-{
-    MatAllocator* getOpenCLAllocator();
-}
-
 extern bool __termination; // skip some cleanups, because process is terminating
                            // (for example, if ExitProcess() was already called)
 
index 0baf013..3e4cfa2 100644 (file)
@@ -56,7 +56,7 @@ UMatData::UMatData(const MatAllocator* allocator)
     prevAllocator = currAllocator = allocator;
     urefcount = refcount = 0;
     data = origdata = 0;
-    size = 0;
+    size = 0; capacity = 0;
     flags = 0;
     handle = 0;
     userdata = 0;
@@ -67,7 +67,7 @@ UMatData::~UMatData()
     prevAllocator = currAllocator = 0;
     urefcount = refcount = 0;
     data = origdata = 0;
-    size = 0;
+    size = 0; capacity = 0;
     flags = 0;
     handle = 0;
     userdata = 0;
index d30b928..765a631 100644 (file)
@@ -291,3 +291,31 @@ TEST(UMat, setOpenCL)
     // reset state to the previous one
     ocl::setUseOpenCL(useOCL);
 }
+
+TEST(UMat, BufferPoolGrowing)
+{
+#ifdef _DEBUG
+    const int ITERATIONS = 100;
+#else
+    const int ITERATIONS = 200;
+#endif
+    const Size sz(1920, 1080);
+    BufferPoolController* c = ocl::getOpenCLAllocator()->getBufferPoolController();
+    if (c)
+    {
+        size_t oldMaxReservedSize = c->getMaxReservedSize();
+        c->freeAllReservedBuffers();
+        c->setMaxReservedSize(sz.area() * 10);
+        for (int i = 0; i < ITERATIONS; i++)
+        {
+            UMat um(Size(sz.width + i, sz.height + i), CV_8UC1);
+            UMat um2(Size(sz.width + 2 * i, sz.height + 2 * i), CV_8UC1);
+        }
+        c->setMaxReservedSize(oldMaxReservedSize);
+        c->freeAllReservedBuffers();
+    }
+    else
+    {
+        std::cout << "Skipped, no OpenCL" << std::endl;
+    }
+}