core(OpenCL): thread-local OpenCL execution context
authorAlexander Alekhin <alexander.a.alekhin@gmail.com>
Tue, 11 Aug 2020 18:13:52 +0000 (18:13 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Wed, 2 Sep 2020 05:04:20 +0000 (05:04 +0000)
12 files changed:
modules/core/include/opencv2/core/mat.hpp
modules/core/include/opencv2/core/ocl.hpp
modules/core/src/directx.cpp
modules/core/src/ocl.cpp
modules/core/src/ocl_disabled.impl.hpp
modules/core/src/opengl.cpp
modules/core/src/precomp.hpp
modules/core/src/va_intel.cpp
modules/core/test/test_opencl.cpp [new file with mode: 0644]
samples/CMakeLists.txt
samples/sycl/CMakeLists.txt [new file with mode: 0644]
samples/sycl/sycl-opencv-interop.cpp [new file with mode: 0644]

index 73bb1d6..bc676c1 100644 (file)
@@ -565,6 +565,7 @@ struct CV_EXPORTS UMatData
     int allocatorFlags_;
     int mapcount;
     UMatData* originalUMatData;
+    std::shared_ptr<void> allocatorContext;
 };
 CV_ENUM_FLAGS(UMatData::MemoryFlag)
 
index 115f5d1..f5d5519 100644 (file)
@@ -229,8 +229,15 @@ public:
 
     CV_WRAP static const Device& getDefault();
 
-protected:
+    /**
+     * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success.
+     */
+    static Device fromHandle(void* d);
+
     struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
     Impl* p;
 };
 
@@ -239,33 +246,55 @@ class CV_EXPORTS Context
 {
 public:
     Context();
-    explicit Context(int dtype);
+    explicit Context(int dtype);  //!< @deprecated
     ~Context();
     Context(const Context& c);
-    Context& operator = (const Context& c);
+    Context& operator= (const Context& c);
 
+    /** @deprecated */
     bool create();
+    /** @deprecated */
     bool create(int dtype);
+
     size_t ndevices() const;
-    const Device& device(size_t idx) const;
+    Device& device(size_t idx) const;
     Program getProg(const ProgramSource& prog,
                     const String& buildopt, String& errmsg);
     void unloadProg(Program& prog);
 
+
+    /** Get thread-local OpenCL context (initialize if necessary) */
+#if 0  // OpenCV 5.0
+    static Context& getDefault();
+#else
     static Context& getDefault(bool initialize = true);
+#endif
+
+    /** @returns cl_context value */
     void* ptr() const;
 
-    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
 
     bool useSVM() const;
     void setUseSVM(bool enabled);
 
+    /**
+     * @param context OpenCL handle (cl_context). clRetainContext() is called on success
+     */
+    static Context fromHandle(void* context);
+    static Context fromDevice(const ocl::Device& device);
+    static Context create(const std::string& configuration);
+
+    void release();
+
     struct Impl;
     inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+// TODO OpenCV 5.0
 //protected:
     Impl* p;
 };
 
+/** @deprecated */
 class CV_EXPORTS Platform
 {
 public:
@@ -275,11 +304,14 @@ public:
     Platform& operator = (const Platform& p);
 
     void* ptr() const;
+
+    /** @deprecated */
     static Platform& getDefault();
 
-    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
-protected:
     struct Impl;
+    inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
+protected:
     Impl* p;
 };
 
@@ -319,6 +351,7 @@ CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, in
 CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
 
 // TODO Move to internal header
+/// @deprecated
 void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
 
 class CV_EXPORTS Queue
@@ -340,6 +373,7 @@ public:
 
     struct Impl; friend struct Impl;
     inline Impl* getImpl() const { return p; }
+    inline bool empty() const { return !p; }
 protected:
     Impl* p;
 };
@@ -490,6 +524,7 @@ public:
 
     struct Impl; friend struct Impl;
     inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
 protected:
     Impl* p;
 public:
@@ -571,6 +606,7 @@ public:
 
     struct Impl; friend struct Impl;
     inline Impl* getImpl() const { return (Impl*)p; }
+    inline bool empty() const { return !p; }
 protected:
     Impl* p;
 };
@@ -579,6 +615,9 @@ class CV_EXPORTS PlatformInfo
 {
 public:
     PlatformInfo();
+    /**
+     * @param id pointer cl_platform_id (cl_platform_id*)
+     */
     explicit PlatformInfo(void* id);
     ~PlatformInfo();
 
@@ -591,8 +630,9 @@ public:
     int deviceNumber() const;
     void getDevice(Device& device, int d) const;
 
-protected:
     struct Impl;
+    bool empty() const { return !p; }
+protected:
     Impl* p;
 };
 
@@ -689,6 +729,106 @@ private:
 CV_EXPORTS MatAllocator* getOpenCLAllocator();
 
 
+class CV_EXPORTS_W OpenCLExecutionContext
+{
+public:
+    OpenCLExecutionContext() = default;
+    ~OpenCLExecutionContext() = default;
+
+    OpenCLExecutionContext(const OpenCLExecutionContext& other) = default;
+    OpenCLExecutionContext(OpenCLExecutionContext&& other) = default;
+
+    OpenCLExecutionContext& operator=(const OpenCLExecutionContext& other) = default;
+    OpenCLExecutionContext& operator=(OpenCLExecutionContext&& other) = default;
+
+    /** Get associated ocl::Context */
+    Context& getContext() const;
+    /** Get associated ocl::Device */
+    Device& getDevice() const;
+    /** Get associated ocl::Queue */
+    Queue& getQueue() const;
+
+    bool useOpenCL() const;
+    void setUseOpenCL(bool flag);
+
+    /** Get OpenCL execution context of current thread.
+     *
+     * Initialize OpenCL execution context if it is empty
+     * - create new
+     * - reuse context of the main thread (threadID = 0)
+     */
+    static OpenCLExecutionContext& getCurrent();
+
+    /** Get OpenCL execution context of current thread (can be empty) */
+    static OpenCLExecutionContext& getCurrentRef();
+
+    /** Bind this OpenCL execution context to current thread.
+     *
+     * Context can't be empty.
+     *
+     * @note clFinish is not called for queue of previous execution context
+     */
+    void bind() const;
+
+    /** Creates new execution context with same OpenCV context and device
+     *
+     * @param q OpenCL queue
+     */
+    OpenCLExecutionContext cloneWithNewQueue(const ocl::Queue& q) const;
+    /** @overload */
+    OpenCLExecutionContext cloneWithNewQueue() const;
+
+    /** @brief Creates OpenCL execution context
+     * OpenCV will check if available OpenCL platform has platformName name, then assign context to
+     * OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
+     * new command queue will be created.
+     *
+     * @note Lifetime of passed handles is transferred to OpenCV wrappers on success
+     *
+     * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
+     * @param platformID ID of platform attached context was created for (cl_platform_id)
+     * @param context OpenCL context to be attached to OpenCV (cl_context)
+     * @param deviceID OpenCL device (cl_device_id)
+     */
+    static OpenCLExecutionContext create(const std::string& platformName, void* platformID, void* context, void* deviceID);
+
+    /** @brief Creates OpenCL execution context
+     *
+     * @param context non-empty OpenCL context
+     * @param device non-empty OpenCL device (must be a part of context)
+     * @param queue non-empty OpenCL queue for provided context and device
+     */
+    static OpenCLExecutionContext create(const Context& context, const Device& device, const ocl::Queue& queue);
+    /** @overload */
+    static OpenCLExecutionContext create(const Context& context, const Device& device);
+
+    struct Impl;
+    inline bool empty() const { return !p; }
+    void release();
+protected:
+    std::shared_ptr<Impl> p;
+};
+
+class OpenCLExecutionContextScope
+{
+    OpenCLExecutionContext ctx_;
+public:
+    inline OpenCLExecutionContextScope(const OpenCLExecutionContext& ctx)
+    {
+        CV_Assert(!ctx.empty());
+        ctx_ = OpenCLExecutionContext::getCurrentRef();
+        ctx.bind();
+    }
+
+    inline ~OpenCLExecutionContextScope()
+    {
+        if (!ctx_.empty())
+        {
+            ctx_.bind();
+        }
+    }
+};
+
 #ifdef __OPENCV_BUILD
 namespace internal {
 
index 56ed26f..c651449 100644 (file)
@@ -458,9 +458,22 @@ Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device)
         CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
     }
 
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
-    return ctx;
+    cl_platform_id platform = platforms[found];
+    std::string platformName = PlatformInfo(platform).name();
+
+    OpenCLExecutionContext clExecCtx;
+    try
+    {
+        clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
+    }
+    catch (...)
+    {
+        clReleaseDevice(device);
+        clReleaseContext(context);
+        throw;
+    }
+    clExecCtx.bind();
+    return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
@@ -565,10 +578,22 @@ Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device)
             CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
     }
 
+    cl_platform_id platform = platforms[found];
+    std::string platformName = PlatformInfo(platform).name();
 
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
-    return ctx;
+    OpenCLExecutionContext clExecCtx;
+    try
+    {
+        clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
+    }
+    catch (...)
+    {
+        clReleaseDevice(device);
+        clReleaseContext(context);
+        throw;
+    }
+    clExecCtx.bind();
+    return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
@@ -675,10 +700,23 @@ Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDev
             CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
     }
 
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
+    cl_platform_id platform = platforms[found];
+    std::string platformName = PlatformInfo(platform).name();
+
+    OpenCLExecutionContext clExecCtx;
+    try
+    {
+        clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
+    }
+    catch (...)
+    {
+        clReleaseDevice(device);
+        clReleaseContext(context);
+        throw;
+    }
+    clExecCtx.bind();
     g_isDirect3DDevice9Ex = true;
-    return ctx;
+    return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
@@ -785,10 +823,23 @@ Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9
             CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for DirectX interop");
     }
 
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
+    cl_platform_id platform = platforms[found];
+    std::string platformName = PlatformInfo(platform).name();
+
+    OpenCLExecutionContext clExecCtx;
+    try
+    {
+        clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
+    }
+    catch (...)
+    {
+        clReleaseDevice(device);
+        clReleaseContext(context);
+        throw;
+    }
+    clExecCtx.bind();
     g_isDirect3DDevice9Ex = false;
-    return ctx;
+    return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
index c1bbd33..c5c5b12 100644 (file)
@@ -829,6 +829,322 @@ public:
 #endif // OPENCV_HAVE_FILESYSTEM_SUPPORT
 
 
+
+struct OpenCLExecutionContext::Impl
+{
+    ocl::Context context_;
+    int device_;  // device index in context
+    ocl::Queue queue_;
+    int useOpenCL_;
+
+protected:
+    Impl() = delete;
+
+    void _init_device(cl_device_id deviceID)
+    {
+        CV_Assert(deviceID);
+        int ndevices = (int)context_.ndevices();
+        CV_Assert(ndevices > 0);
+        bool found = false;
+        for (int i = 0; i < ndevices; i++)
+        {
+            ocl::Device d = context_.device(i);
+            cl_device_id dhandle = (cl_device_id)d.ptr();
+            if (dhandle == deviceID)
+            {
+                device_ = i;
+                found = true;
+                break;
+            }
+        }
+        CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
+    }
+
+    void _init_device(const ocl::Device& device)
+    {
+        CV_Assert(device.ptr());
+        int ndevices = (int)context_.ndevices();
+        CV_Assert(ndevices > 0);
+        bool found = false;
+        for (int i = 0; i < ndevices; i++)
+        {
+            ocl::Device d = context_.device(i);
+            if (d.getImpl() == device.getImpl())
+            {
+                device_ = i;
+                found = true;
+                break;
+            }
+        }
+        CV_Assert(found && "OpenCL device can't work with passed OpenCL context");
+    }
+
+public:
+    Impl(cl_platform_id platformID, cl_context context, cl_device_id deviceID)
+        : device_(0), useOpenCL_(-1)
+    {
+        CV_UNUSED(platformID);
+        CV_Assert(context);
+        CV_Assert(deviceID);
+
+        context_ = Context::fromHandle(context);
+        _init_device(deviceID);
+        queue_ = Queue(context_, context_.device(device_));
+    }
+
+    Impl(const ocl::Context& context, const ocl::Device& device, const ocl::Queue& queue)
+        : device_(0), useOpenCL_(-1)
+    {
+        CV_Assert(context.ptr());
+        CV_Assert(device.ptr());
+
+        context_ = context;
+        _init_device(device);
+        queue_ = queue;
+    }
+
+    Impl(const ocl::Context& context, const ocl::Device& device)
+        : device_(0), useOpenCL_(-1)
+    {
+        CV_Assert(context.ptr());
+        CV_Assert(device.ptr());
+
+        context_ = context;
+        _init_device(device);
+        queue_ = Queue(context_, context_.device(device_));
+    }
+
+    Impl(const ocl::Context& context, const int device, const ocl::Queue& queue)
+        : context_(context)
+        , device_(device)
+        , queue_(queue)
+        , useOpenCL_(-1)
+    {
+        // nothing
+    }
+    Impl(const Impl& other)
+        : context_(other.context_)
+        , device_(other.device_)
+        , queue_(other.queue_)
+        , useOpenCL_(-1)
+    {
+        // nothing
+    }
+
+    inline bool useOpenCL() const { return const_cast<Impl*>(this)->useOpenCL(); }
+    bool useOpenCL()
+    {
+        if (useOpenCL_ < 0)
+        {
+            try
+            {
+                useOpenCL_ = 0;
+                if (!context_.empty() && context_.ndevices() > 0)
+                {
+                    const Device& d = context_.device(device_);
+                    useOpenCL_ = d.available();
+                }
+            }
+            catch (const cv::Exception&)
+            {
+                // nothing
+            }
+            if (!useOpenCL_)
+                CV_LOG_INFO(NULL, "OpenCL: can't use OpenCL execution context");
+        }
+        return useOpenCL_ > 0;
+    }
+
+    void setUseOpenCL(bool flag)
+    {
+        if (!flag)
+            useOpenCL_ = 0;
+        else
+            useOpenCL_ = -1;
+    }
+
+    static const std::shared_ptr<Impl>& getInitializedExecutionContext()
+    {
+        CV_TRACE_FUNCTION();
+
+        CV_LOG_INFO(NULL, "OpenCL: initializing thread execution context");
+
+        static bool initialized = false;
+        static std::shared_ptr<Impl> g_primaryExecutionContext;
+
+        if (!initialized)
+        {
+            cv::AutoLock lock(getInitializationMutex());
+            if (!initialized)
+            {
+                CV_LOG_INFO(NULL, "OpenCL: creating new execution context...");
+                try
+                {
+                    Context c = ocl::Context::create(std::string());
+                    if (c.ndevices())
+                    {
+                        int deviceId = 0;
+                        auto& d = c.device(deviceId);
+                        if (d.available())
+                        {
+                            auto q = ocl::Queue(c, d);
+                            if (!q.ptr())
+                            {
+                                CV_LOG_ERROR(NULL, "OpenCL: Can't create default OpenCL queue");
+                            }
+                            else
+                            {
+                                g_primaryExecutionContext = std::make_shared<Impl>(c, deviceId, q);
+                                CV_LOG_INFO(NULL, "OpenCL: device=" << d.name());
+                            }
+                        }
+                        else
+                        {
+                            CV_LOG_ERROR(NULL, "OpenCL: OpenCL device is not available (CL_DEVICE_AVAILABLE returns false)");
+                        }
+                    }
+                    else
+                    {
+                        CV_LOG_INFO(NULL, "OpenCL: context is not available/disabled");
+                    }
+                }
+                catch (const std::exception& e)
+                {
+                    CV_LOG_INFO(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: " << e.what());
+                }
+                catch (...)
+                {
+                    CV_LOG_WARNING(NULL, "OpenCL: Can't initialize OpenCL context/device/queue: unknown C++ exception");
+                }
+                initialized = true;
+            }
+        }
+        return g_primaryExecutionContext;
+    }
+};
+
+Context& OpenCLExecutionContext::getContext() const
+{
+    CV_Assert(p);
+    return p->context_;
+}
+Device& OpenCLExecutionContext::getDevice() const
+{
+    CV_Assert(p);
+    return p->context_.device(p->device_);
+}
+Queue& OpenCLExecutionContext::getQueue() const
+{
+    CV_Assert(p);
+    return p->queue_;
+}
+
+bool OpenCLExecutionContext::useOpenCL() const
+{
+    if (p)
+        return p->useOpenCL();
+    return false;
+}
+void OpenCLExecutionContext::setUseOpenCL(bool flag)
+{
+    CV_Assert(p);
+    p->setUseOpenCL(flag);
+}
+
+/* static */
+OpenCLExecutionContext& OpenCLExecutionContext::getCurrent()
+{
+    CV_TRACE_FUNCTION();
+    CoreTLSData& data = getCoreTlsData();
+    OpenCLExecutionContext& c = data.oclExecutionContext;
+    if (!data.oclExecutionContextInitialized)
+    {
+        data.oclExecutionContextInitialized = true;
+        if (c.empty() && haveOpenCL())
+            c.p = Impl::getInitializedExecutionContext();
+    }
+    return c;
+}
+
+/* static */
+OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef()
+{
+    CV_TRACE_FUNCTION();
+    CoreTLSData& data = getCoreTlsData();
+    OpenCLExecutionContext& c = data.oclExecutionContext;
+    return c;
+}
+
+void OpenCLExecutionContext::bind() const
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(p);
+    CoreTLSData& data = getCoreTlsData();
+    data.oclExecutionContext = *this;
+    data.oclExecutionContextInitialized = true;
+    data.useOpenCL = p->useOpenCL_;  // propagate "-1", avoid call useOpenCL()
+}
+
+
+OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(p);
+    const Queue q(getContext(), getDevice());
+    return cloneWithNewQueue(q);
+}
+
+OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const
+{
+    CV_TRACE_FUNCTION();
+    CV_Assert(p);
+    CV_Assert(q.ptr() != NULL);
+    OpenCLExecutionContext c;
+    c.p = std::make_shared<Impl>(p->context_, p->device_, q);
+    return c;
+}
+
+/* static */
+OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue)
+{
+    CV_TRACE_FUNCTION();
+    if (!haveOpenCL())
+        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
+
+    CV_Assert(!context.empty());
+    CV_Assert(context.ptr());
+    CV_Assert(!device.empty());
+    CV_Assert(device.ptr());
+    OpenCLExecutionContext ctx;
+    ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device, queue);
+    return ctx;
+
+}
+
+/* static */
+OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device)
+{
+    CV_TRACE_FUNCTION();
+    if (!haveOpenCL())
+        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
+
+    CV_Assert(!context.empty());
+    CV_Assert(context.ptr());
+    CV_Assert(!device.empty());
+    CV_Assert(device.ptr());
+    OpenCLExecutionContext ctx;
+    ctx.p = std::make_shared<OpenCLExecutionContext::Impl>(context, device);
+    return ctx;
+
+}
+
+void OpenCLExecutionContext::release()
+{
+    CV_TRACE_FUNCTION();
+    p.reset();
+}
+
+
 // true if we have initialized OpenCL subsystem with available platforms
 static bool g_isOpenCVActivated = false;
 
@@ -848,14 +1164,18 @@ bool haveOpenCL()
             {
                 g_isOpenCLAvailable = false;
                 g_isOpenCLInitialized = true;
+                return false;
             }
         }
+
+        cv::AutoLock lock(getInitializationMutex());
         CV_LOG_INFO(NULL, "Initialize OpenCL runtime...");
         try
         {
             cl_uint n = 0;
             g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
             g_isOpenCVActivated = n > 0;
+            CV_LOG_INFO(NULL, "OpenCL: found " << n << " platforms");
         }
         catch (...)
         {
@@ -873,11 +1193,16 @@ bool useOpenCL()
     {
         try
         {
-            data.useOpenCL = (int)(haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available()) ? 1 : 0;
+            data.useOpenCL = 0;
+            if (haveOpenCL())
+            {
+                auto c = OpenCLExecutionContext::getCurrent();
+                data.useOpenCL = c.useOpenCL();
+            }
         }
         catch (...)
         {
-            data.useOpenCL = 0;
+            CV_LOG_INFO(NULL, "OpenCL: can't initialize thread OpenCL execution context");
         }
     }
     return data.useOpenCL > 0;
@@ -895,16 +1220,23 @@ void setUseOpenCL(bool flag)
     CV_TRACE_FUNCTION();
 
     CoreTLSData& data = getCoreTlsData();
-    if (!flag)
+    auto& c = OpenCLExecutionContext::getCurrentRef();
+    if (!c.empty())
     {
-        data.useOpenCL = 0;
+        c.setUseOpenCL(flag);
+        data.useOpenCL = c.useOpenCL();
     }
-    else if( haveOpenCL() )
+    else
     {
-        data.useOpenCL = (Device::getDefault().ptr() != NULL) ? 1 : 0;
+        if (!flag)
+            data.useOpenCL = 0;
+        else
+            data.useOpenCL = -1; // enabled by default (if context is not initialized)
     }
 }
 
+
+
 #ifdef HAVE_CLAMDBLAS
 
 class AmdBlasHelper
@@ -1151,6 +1483,7 @@ void* Platform::ptr() const
 
 Platform& Platform::getDefault()
 {
+    CV_LOG_ONCE_WARNING(NULL, "OpenCL: Platform::getDefault() is deprecated and will be removed. Use cv::ocl::getPlatfomsInfo() for enumeration of available platforms");
     static Platform p;
     if( !p.p )
     {
@@ -1187,9 +1520,24 @@ static void parseDeviceVersion(const String &deviceVersion, int &major, int &min
 struct Device::Impl
 {
     Impl(void* d)
+        : refcount(1)
+        , handle(0)
+    {
+        try
+        {
+            cl_device_id device = (cl_device_id)d;
+            _init(device);
+            CV_OCL_CHECK(clRetainDevice(device));  // increment reference counter on success only
+        }
+        catch (...)
+        {
+            throw;
+        }
+    }
+
+    void _init(cl_device_id d)
     {
         handle = (cl_device_id)d;
-        refcount = 1;
 
         name_ = getStrProp(CL_DEVICE_NAME);
         version_ = getStrProp(CL_DEVICE_VERSION);
@@ -1252,6 +1600,20 @@ struct Device::Impl
 #endif
     }
 
+    ~Impl()
+    {
+#ifdef _WIN32
+        if (!cv::__termination)
+#endif
+        {
+            if (handle)
+            {
+                CV_OCL_CHECK(clReleaseDevice(handle));
+                handle = 0;
+            }
+        }
+    }
+
     template<typename _TpCL, typename _TpOut>
     _TpOut getProp(cl_device_info prop) const
     {
@@ -1349,6 +1711,16 @@ void Device::set(void* d)
     if(p)
         p->release();
     p = new Impl(d);
+    if (p->handle)
+    {
+        CV_OCL_CHECK(clReleaseDevice((cl_device_id)d));
+    }
+}
+
+Device Device::fromHandle(void* d)
+{
+    Device device(d);
+    return device;
 }
 
 void* Device::ptr() const
@@ -1611,10 +1983,14 @@ size_t Device::profilingTimerResolution() const
 
 const Device& Device::getDefault()
 {
-    const Context& ctx = Context::getDefault();
-    int idx = getCoreTlsData().device;
-    const Device& device = ctx.device(idx);
-    return device;
+    auto& c = OpenCLExecutionContext::getCurrent();
+    if (!c.empty())
+    {
+        return c.getDevice();
+    }
+
+    static Device dummy;
+    return dummy;
 }
 
 ////////////////////////////////////// Context ///////////////////////////////////////////////////
@@ -1666,7 +2042,7 @@ static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
     split(configurationStr, ':', parts);
     if (parts.size() > 3)
     {
-        std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
+        CV_LOG_ERROR(NULL, "OpenCL: Invalid configuration string for OpenCL device: " << configurationStr);
         return false;
     }
     if (parts.size() > 2)
@@ -1683,17 +2059,20 @@ static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
 }
 
 #if defined WINRT || defined _WIN32_WCE
-static cl_device_id selectOpenCLDevice()
+static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
 {
+    CV_UNUSED(configuration)
     return NULL;
 }
 #else
-static cl_device_id selectOpenCLDevice()
+static cl_device_id selectOpenCLDevice(const char* configuration = NULL)
 {
     std::string platform, deviceName;
     std::vector<std::string> deviceTypes;
 
-    const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
+    if (!configuration)
+        configuration = getenv("OPENCV_OPENCL_DEVICE");
+
     if (configuration &&
             (strcmp(configuration, "disabled") == 0 ||
              !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
@@ -1753,7 +2132,7 @@ static cl_device_id selectOpenCLDevice()
         }
         if (selectedPlatform == -1)
         {
-            std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
+            CV_LOG_ERROR(NULL, "OpenCL: Can't find OpenCL platform by name: " << platform);
             goto not_found;
         }
     }
@@ -1784,7 +2163,7 @@ static cl_device_id selectOpenCLDevice()
             deviceType = Device::TYPE_ALL;
         else
         {
-            std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
+            CV_LOG_ERROR(NULL, "OpenCL: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t]);
             goto not_found;
         }
 
@@ -1835,13 +2214,16 @@ not_found:
     if (!configuration)
         return NULL; // suppress messages on stderr
 
-    std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << configuration << std::endl
-            << "    Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
-            << "    Device types: ";
+    std::ostringstream msg;
+    msg << "ERROR: Requested OpenCL device not found, check configuration: '" << configuration << "'" << std::endl
+        << "    Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
+        << "    Device types:";
     for (size_t t = 0; t < deviceTypes.size(); t++)
-        std::cerr << deviceTypes[t] << " ";
+        msg << ' ' << deviceTypes[t];
+
+    msg << std::endl << "    Device name: " << (deviceName.length() == 0 ? "any" : deviceName);
 
-    std::cerr << std::endl << "    Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
+    CV_LOG_ERROR(NULL, msg.str());
     return NULL;
 }
 #endif
@@ -1922,131 +2304,252 @@ static size_t getProgramCountLimit()
     return count;
 }
 
+static int g_contextId = 0;
+
+class OpenCLBufferPoolImpl;
+class OpenCLSVMBufferPoolImpl;
+
 struct Context::Impl
 {
     static Context::Impl* get(Context& context) { return context.p; }
 
-    void __init()
+    typedef std::deque<Context::Impl*> container_t;
+    static container_t& getGlobalContainer()
     {
-        refcount = 1;
-        handle = 0;
+        static container_t g_contexts;
+        return g_contexts;
+    }
+
+protected:
+    Impl(const std::string& configuration_)
+        : refcount(1)
+        , contextId(CV_XADD(&g_contextId, 1))
+        , configuration(configuration_)
+        , handle(0)
 #ifdef HAVE_OPENCL_SVM
-        svmInitialized = false;
+        , svmInitialized(false)
 #endif
+    {
+        if (!haveOpenCL())
+            CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
+
+        cv::AutoLock lock(cv::getInitializationMutex());
+        auto& container = getGlobalContainer();
+        container.resize(std::max(container.size(), (size_t)contextId + 1));
+        container[contextId] = this;
     }
 
-    Impl()
+    ~Impl()
     {
-        __init();
+#ifdef _WIN32
+        if (!cv::__termination)
+#endif
+        {
+            if (handle)
+            {
+                CV_OCL_DBG_CHECK(clReleaseContext(handle));
+                handle = NULL;
+            }
+            devices.clear();
+        }
+
+        {
+            cv::AutoLock lock(cv::getInitializationMutex());
+            auto& container = getGlobalContainer();
+            CV_Assert((size_t)contextId < container.size());
+            container[contextId] = NULL;
+        }
     }
 
-    void setDefault()
+    void init_device_list()
     {
-        CV_Assert(handle == NULL);
+        CV_Assert(handle);
 
-        cl_device_id d = selectOpenCLDevice();
+        cl_uint ndevices = 0;
+        CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_NUM_DEVICES, sizeof(ndevices), &ndevices, NULL));
+        CV_Assert(ndevices > 0);
 
-        if (d == NULL)
-            return;
+        cv::AutoBuffer<cl_device_id> cl_devices(ndevices);
+        size_t devices_ret_size = 0;
+        CV_OCL_CHECK(clGetContextInfo(handle, CL_CONTEXT_DEVICES, cl_devices.size() * sizeof(cl_device_id), &cl_devices[0], &devices_ret_size));
+        CV_CheckEQ(devices_ret_size, cl_devices.size() * sizeof(cl_device_id), "");
 
-        cl_platform_id pl = NULL;
-        CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
+        devices.clear();
+        for (unsigned i = 0; i < ndevices; i++)
+        {
+            devices.emplace_back(Device::fromHandle(cl_devices[i]));
+        }
+    }
 
-        cl_context_properties prop[] =
+    void __init_buffer_pools();  // w/o synchronization
+    void _init_buffer_pools() const
+    {
+        if (!bufferPool_)
         {
-            CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
-            0
-        };
+            cv::AutoLock lock(cv::getInitializationMutex());
+            if (!bufferPool_)
+            {
+                const_cast<Impl*>(this)->__init_buffer_pools();
+            }
+        }
+    }
+public:
+    static Impl* findContext(const std::string& configuration)
+    {
+        CV_TRACE_FUNCTION();
+        cv::AutoLock lock(cv::getInitializationMutex());
+        auto& container = getGlobalContainer();
+        if (configuration.empty() && !container.empty())
+            return container[0];
+        for (auto it = container.begin(); it != container.end(); ++it)
+        {
+            Impl* i = *it;
+            if (i && i->configuration == configuration)
+            {
+                return i;
+            }
+        }
+        return NULL;
+    }
 
-        // !!! in the current implementation force the number of devices to 1 !!!
-        cl_uint nd = 1;
-        cl_int status;
+    static Impl* findOrCreateContext(const std::string& configuration_)
+    {
+        CV_TRACE_FUNCTION();
+        std::string configuration = configuration_;
+        if (configuration_.empty())
+        {
+            const char* c = getenv("OPENCV_OPENCL_DEVICE");
+            if (c)
+                configuration = c;
+        }
+        Impl* impl = findContext(configuration);
+        if (impl)
+        {
+            CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
+            return impl;
+        }
 
-        handle = clCreateContext(prop, nd, &d, 0, 0, &status);
-        CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
+        cl_device_id d = selectOpenCLDevice(configuration.empty() ? NULL : configuration.c_str());
+        if (d == NULL)
+            return NULL;
 
-        bool ok = handle != 0 && status == CL_SUCCESS;
-        if( ok )
+        impl = new Impl(configuration);
+        try
         {
-            devices.resize(nd);
-            devices[0].set(d);
+            impl->createFromDevice(d);
+            if (impl->handle)
+                return impl;
+            delete impl;
+            return NULL;
+        }
+        catch (...)
+        {
+            delete impl;
+            throw;
         }
-        else
-            handle = NULL;
     }
 
-    Impl(int dtype0)
+    static Impl* findOrCreateContext(cl_context h)
     {
-        __init();
+        CV_TRACE_FUNCTION();
 
-        cl_int retval = 0;
-        cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
-        cl_context_properties prop[] =
+        CV_Assert(h);
+
+        std::string configuration = cv::format("@ctx-%p", (void*)h);
+        Impl* impl = findContext(configuration);
+        if (impl)
         {
-            CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
-            0
-        };
+            CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
+            impl->addref();
+            return impl;
+        }
 
-        cl_uint nd0 = 0;
-        int dtype = dtype0 & 15;
-        cl_int status = clGetDeviceIDs(pl, dtype, 0, NULL, &nd0);
-        if (status != CL_DEVICE_NOT_FOUND) // Not an error if platform has no devices
+        impl = new Impl(configuration);
+        try
+        {
+            CV_OCL_CHECK(clRetainContext(h));
+            impl->handle = h;
+            impl->init_device_list();
+            return impl;
+        }
+        catch (...)
         {
-            CV_OCL_DBG_CHECK_RESULT(status,
-                cv::format("clGetDeviceIDs(platform=%p, device_type=%d, num_entries=0, devices=NULL, numDevices=%p)", pl, dtype, &nd0).c_str());
+            delete impl;
+            throw;
         }
+    }
 
-        if (nd0 == 0)
-            return;
+    static Impl* findOrCreateContext(const ocl::Device& device)
+    {
+        CV_TRACE_FUNCTION();
 
-        AutoBuffer<void*> dlistbuf(nd0*2+1);
-        cl_device_id* dlist = (cl_device_id*)dlistbuf.data();
-        cl_device_id* dlist_new = dlist + nd0;
-        CV_OCL_DBG_CHECK(clGetDeviceIDs(pl, dtype, nd0, dlist, &nd0));
+        CV_Assert(!device.empty());
+        cl_device_id d = (cl_device_id)device.ptr();
+        CV_Assert(d);
 
-        cl_uint i, nd = 0;
-        String name0;
-        for(i = 0; i < nd0; i++)
+        std::string configuration = cv::format("@dev-%p", (void*)d);
+        Impl* impl = findContext(configuration);
+        if (impl)
         {
-            Device d(dlist[i]);
-            if( !d.available() || !d.compilerAvailable() )
-                continue;
-            if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
-                continue;
-            if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
-                continue;
-            String name = d.name();
-            if( nd != 0 && name != name0 )
-                continue;
-            name0 = name;
-            dlist_new[nd++] = dlist[i];
+            CV_LOG_INFO(NULL, "OpenCL: reuse context@" << impl->contextId << " for configuration: " << configuration)
+            impl->addref();
+            return impl;
+        }
+
+        impl = new Impl(configuration);
+        try
+        {
+            impl->createFromDevice(d);
+            CV_Assert(impl->handle);
+            return impl;
+        }
+        catch (...)
+        {
+            delete impl;
+            throw;
         }
+    }
 
-        if(nd == 0)
+    void setDefault()
+    {
+        CV_TRACE_FUNCTION();
+        cl_device_id d = selectOpenCLDevice();
+
+        if (d == NULL)
             return;
 
+        createFromDevice(d);
+    }
+
+    void createFromDevice(cl_device_id d)
+    {
+        CV_TRACE_FUNCTION();
+        CV_Assert(handle == NULL);
+
+        cl_platform_id pl = NULL;
+        CV_OCL_DBG_CHECK(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL));
+
+        cl_context_properties prop[] =
+        {
+            CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
+            0
+        };
+
         // !!! in the current implementation force the number of devices to 1 !!!
-        nd = 1;
+        cl_uint nd = 1;
+        cl_int status;
+
+        handle = clCreateContext(prop, nd, &d, 0, 0, &status);
+        CV_OCL_DBG_CHECK_RESULT(status, "clCreateContext");
 
-        handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
-        CV_OCL_DBG_CHECK_RESULT(retval, "clCreateContext");
-        bool ok = handle != 0 && retval == CL_SUCCESS;
+        bool ok = handle != 0 && status == CL_SUCCESS;
         if( ok )
         {
             devices.resize(nd);
-            for( i = 0; i < nd; i++ )
-                devices[i].set(dlist_new[i]);
+            devices[0].set(d);
         }
-    }
-
-    ~Impl()
-    {
-        if(handle)
-        {
-            CV_OCL_DBG_CHECK(clReleaseContext(handle));
+        else
             handle = NULL;
-        }
-        devices.clear();
     }
 
     Program getProg(const ProgramSource& src, const String& buildflags, String& errmsg);
@@ -2124,6 +2627,9 @@ struct Context::Impl
 
     IMPLEMENT_REFCOUNTABLE();
 
+    const int contextId;  // global unique ID
+    const std::string configuration;
+
     cl_context handle;
     std::vector<Device> devices;
 
@@ -2136,6 +2642,21 @@ struct Context::Impl
     typedef std::list<cv::String> CacheList;
     CacheList cacheList;
 
+    std::shared_ptr<OpenCLBufferPoolImpl> bufferPool_;
+    std::shared_ptr<OpenCLBufferPoolImpl> bufferPoolHostPtr_;
+    OpenCLBufferPoolImpl& getBufferPool() const
+    {
+        _init_buffer_pools();
+        CV_DbgAssert(bufferPool_);
+        return *bufferPool_.get();
+    }
+    OpenCLBufferPoolImpl& getBufferPoolHostPtr() const
+    {
+        _init_buffer_pools();
+        CV_DbgAssert(bufferPoolHostPtr_);
+        return *bufferPoolHostPtr_.get();
+    }
+
 #ifdef HAVE_OPENCL_SVM
     bool svmInitialized;
     bool svmAvailable;
@@ -2271,6 +2792,15 @@ struct Context::Impl
         svmFunctions.fn_clSVMAlloc = NULL;
         return;
     }
+
+    std::shared_ptr<OpenCLSVMBufferPoolImpl> bufferPoolSVM_;
+
+    OpenCLSVMBufferPoolImpl& getBufferPoolSVM() const
+    {
+        _init_buffer_pools();
+        CV_DbgAssert(bufferPoolSVM_);
+        return *bufferPoolSVM_.get();
+    }
 #endif
 
     friend class Program;
@@ -2282,49 +2812,66 @@ Context::Context()
     p = 0;
 }
 
+Context::~Context()
+{
+    release();
+}
+
+// deprecated
 Context::Context(int dtype)
 {
     p = 0;
     create(dtype);
 }
 
-bool Context::create()
+void Context::release()
 {
-    if( !haveOpenCL() )
-        return false;
-    if(p)
-        p->release();
-    p = new Impl();
-    if(!p->handle)
+    if (p)
     {
-        delete p;
-        p = 0;
+        p->release();
+        p = NULL;
     }
-    return p != 0;
 }
 
-bool Context::create(int dtype0)
+bool Context::create()
 {
-    if( !haveOpenCL() )
+    release();
+    if (!haveOpenCL())
         return false;
-    if(p)
-        p->release();
-    p = new Impl(dtype0);
-    if(!p->handle)
-    {
-        delete p;
-        p = 0;
-    }
-    return p != 0;
+    p = Impl::findOrCreateContext(std::string());
+    if (p->handle)
+        return true;
+    release();
+    return false;
 }
 
-Context::~Context()
+// deprecated
+bool Context::create(int dtype)
 {
-    if (p)
+    if( !haveOpenCL() )
+        return false;
+    release();
+    if (dtype == CL_DEVICE_TYPE_DEFAULT || (unsigned)dtype == (unsigned)CL_DEVICE_TYPE_ALL)
     {
-        p->release();
-        p = NULL;
+        p = Impl::findOrCreateContext("");
+    }
+    else if (dtype == CL_DEVICE_TYPE_GPU)
+    {
+        p = Impl::findOrCreateContext(":GPU:");
     }
+    else if (dtype == CL_DEVICE_TYPE_CPU)
+    {
+        p = Impl::findOrCreateContext(":CPU:");
+    }
+    else
+    {
+        CV_LOG_ERROR(NULL, "OpenCL: Can't recognize OpenCV device type=" << dtype);
+    }
+    if (p && !p->handle)
+    {
+        release();
+    }
+    return p != 0;
 }
 
 Context::Context(const Context& c)
@@ -2355,7 +2902,7 @@ size_t Context::ndevices() const
     return p ? p->devices.size() : 0;
 }
 
-const Device& Context::device(size_t idx) const
+Device& Context::device(size_t idx) const
 {
     static Device dummy;
     return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
@@ -2363,23 +2910,16 @@ const Device& Context::device(size_t idx) const
 
 Context& Context::getDefault(bool initialize)
 {
-    static Context* ctx = new Context();
-    if(!ctx->p && haveOpenCL())
+    auto& c = OpenCLExecutionContext::getCurrent();
+    if (!c.empty())
     {
-        if (!ctx->p)
-            ctx->p = new Impl();
-        if (initialize)
-        {
-            // do not create new Context right away.
-            // First, try to retrieve existing context of the same type.
-            // In its turn, Platform::getContext() may call Context::create()
-            // if there is no such context.
-            if (ctx->p->handle == NULL)
-                ctx->p->setDefault();
-        }
+        auto& ctx = c.getContext();
+        return ctx;
     }
 
-    return *ctx;
+    CV_UNUSED(initialize);
+    static Context dummy;
+    return dummy;
 }
 
 Program Context::getProg(const ProgramSource& prog,
@@ -2394,6 +2934,30 @@ void Context::unloadProg(Program& prog)
         p->unloadProg(prog);
 }
 
+/* static */
+Context Context::fromHandle(void* context)
+{
+    Context ctx;
+    ctx.p = Impl::findOrCreateContext((cl_context)context);
+    return ctx;
+}
+
+/* static */
+Context Context::fromDevice(const ocl::Device& device)
+{
+    Context ctx;
+    ctx.p = Impl::findOrCreateContext(device);
+    return ctx;
+}
+
+/* static */
+Context Context::create(const std::string& configuration)
+{
+    Context ctx;
+    ctx.p = Impl::findOrCreateContext(configuration);
+    return ctx;
+}
+
 #ifdef HAVE_OPENCL_SVM
 bool Context::useSVM() const
 {
@@ -2477,12 +3041,23 @@ static void get_platform_name(cl_platform_id id, String& name)
 */
 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
 {
-    cl_uint cnt = 0;
+    auto ctx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
+    ctx.bind();
+}
 
+/* static */
+OpenCLExecutionContext OpenCLExecutionContext::create(
+        const std::string& platformName, void* platformID, void* context, void* deviceID
+)
+{
+    if (!haveOpenCL())
+        CV_Error(cv::Error::OpenCLApiCallError, "OpenCL runtime is not available!");
+
+    cl_uint cnt = 0;
     CV_OCL_CHECK(clGetPlatformIDs(0, 0, &cnt));
 
     if (cnt == 0)
-        CV_Error(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
+        CV_Error(cv::Error::OpenCLApiCallError, "No OpenCL platform available!");
 
     std::vector<cl_platform_id> platforms(cnt);
 
@@ -2512,44 +3087,25 @@ void attachContext(const String& platformName, void* platformID, void* context,
     if (platformName != actualPlatformName)
         CV_Error(cv::Error::OpenCLApiCallError, "No matched platforms available!");
 
-    // do not initialize OpenCL context
-    Context ctx = Context::getDefault(false);
-
-    // attach supplied context to OpenCV
-    initializeContextFromHandle(ctx, platformID, context, deviceID);
-
-    CV_OCL_CHECK(clRetainContext((cl_context)context));
-
-    // clear command queue, if any
-    CoreTLSData& data = getCoreTlsData();
-    data.oclQueue.finish();
-    Queue q;
-    data.oclQueue = q;
-
-    return;
-} // attachContext()
-
+    OpenCLExecutionContext ctx;
+    ctx.p = std::make_shared<OpenCLExecutionContext::Impl>((cl_platform_id)platformID, (cl_context)context, (cl_device_id)deviceID);
+    CV_OCL_CHECK(clReleaseContext((cl_context)context));
+    CV_OCL_CHECK(clReleaseDevice((cl_device_id)deviceID));
+    return ctx;
+}
 
-void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
+void initializeContextFromHandle(Context& ctx, void* _platform, void* _context, void* _device)
 {
+    // internal call, less checks
+    cl_platform_id platformID = (cl_platform_id)_platform;
     cl_context context = (cl_context)_context;
-    cl_device_id device = (cl_device_id)_device;
-
-    // cleanup old context
-    Context::Impl * impl = ctx.p;
-    if (impl->handle)
-    {
-        CV_OCL_DBG_CHECK(clReleaseContext(impl->handle));
-    }
-    impl->devices.clear();
+    cl_device_id deviceID = (cl_device_id)_device;
 
-    impl->handle = context;
-    impl->devices.resize(1);
-    impl->devices[0].set(device);
+    std::string platformName = PlatformInfo(platformID).name();
 
-    Platform& p = Platform::getDefault();
-    Platform::Impl* pImpl = p.p;
-    pImpl->handle = (cl_platform_id)platform;
+    auto clExecCtx = OpenCLExecutionContext::create(platformName, platformID, context, deviceID);
+    CV_Assert(!clExecCtx.empty());
+    ctx = clExecCtx.getContext();
 }
 
 /////////////////////////////////////////// Queue /////////////////////////////////////////////
@@ -2712,10 +3268,14 @@ void* Queue::ptr() const
 
 Queue& Queue::getDefault()
 {
-    Queue& q = getCoreTlsData().oclQueue;
-    if( !q.p && haveOpenCL() )
-        q.create(Context::getDefault());
-    return q;
+    auto& c = OpenCLExecutionContext::getCurrent();
+    if (!c.empty())
+    {
+        auto& q = c.getQueue();
+        return q;
+    }
+    static Queue dummy;
+    return dummy;
 }
 
 static cl_command_queue getQueue(const Queue& q)
@@ -4478,14 +5038,32 @@ private:
 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
 #endif
 
-class OpenCLAllocator CV_FINAL : public MatAllocator
+
+void Context::Impl::__init_buffer_pools()
 {
-    mutable OpenCLBufferPoolImpl bufferPool;
-    mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
-#ifdef  HAVE_OPENCL_SVM
-    mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
+    bufferPool_ = std::make_shared<OpenCLBufferPoolImpl>(0);
+    OpenCLBufferPoolImpl& bufferPool = *bufferPool_.get();
+    bufferPoolHostPtr_ = std::make_shared<OpenCLBufferPoolImpl>(CL_MEM_ALLOC_HOST_PTR);
+    OpenCLBufferPoolImpl& bufferPoolHostPtr = *bufferPoolHostPtr_.get();
+
+    size_t defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
+    size_t poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
+    bufferPool.setMaxReservedSize(poolSize);
+    size_t poolSizeHostPtr = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
+    bufferPoolHostPtr.setMaxReservedSize(poolSizeHostPtr);
+
+#ifdef HAVE_OPENCL_SVM
+    bufferPoolSVM_ = std::make_shared<OpenCLSVMBufferPoolImpl>();
+    OpenCLSVMBufferPoolImpl& bufferPoolSVM = *bufferPoolSVM_.get();
+    size_t poolSizeSVM = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
+    bufferPoolSVM.setMaxReservedSize(poolSizeSVM);
 #endif
 
+    CV_LOG_INFO(NULL, "OpenCL: Initializing buffer pool for context@" << contextId << " with max capacity: poolSize=" << poolSize << " poolSizeHostPtr=" << poolSizeHostPtr);
+}
+
+class OpenCLAllocator CV_FINAL : public MatAllocator
+{
 public:
     enum AllocatorFlags
     {
@@ -4498,20 +5076,7 @@ public:
     };
 
     OpenCLAllocator()
-        : bufferPool(0),
-          bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
-    {
-        size_t defaultPoolSize, poolSize;
-        defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
-        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
-        bufferPool.setMaxReservedSize(poolSize);
-        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
-        bufferPoolHostPtr.setMaxReservedSize(poolSize);
-#ifdef HAVE_OPENCL_SVM
-        poolSize = utils::getConfigurationParameterSizeT("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
-        bufferPoolSVM.setMaxReservedSize(poolSize);
-#endif
-
+    {
         matStdAllocator = Mat::getDefaultAllocator();
     }
     ~OpenCLAllocator()
@@ -4563,6 +5128,9 @@ public:
     {
         if(!useOpenCL())
             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
+
+        flushCleanupQueue();
+
         CV_Assert(data == 0);
         size_t total = CV_ELEM_SIZE(type);
         for( int i = dims-1; i >= 0; i-- )
@@ -4573,7 +5141,9 @@ public:
         }
 
         Context& ctx = Context::getDefault();
-        flushCleanupQueue();
+        if (!ctx.getImpl())
+            return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
+        Context::Impl& ctxImpl = *ctx.getImpl();
 
         int createFlags = 0;
         UMatData::MemoryFlag flags0 = static_cast<UMatData::MemoryFlag>(0);
@@ -4587,7 +5157,7 @@ public:
         if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
         {
             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
-            handle = bufferPoolSVM.allocate(total);
+            handle = ctxImpl.getBufferPoolSVM().allocate(total);
 
             // this property is constant, so single buffer pool can be used here
             bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
@@ -4598,12 +5168,12 @@ public:
         if (createFlags == 0)
         {
             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
-            handle = bufferPool.allocate(total);
+            handle = ctxImpl.getBufferPool().allocate(total);
         }
         else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
         {
             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
-            handle = bufferPoolHostPtr.allocate(total);
+            handle = ctxImpl.getBufferPoolHostPtr().allocate(total);
         }
         else
         {
@@ -4619,6 +5189,7 @@ public:
         u->handle = handle;
         u->flags = flags0;
         u->allocatorFlags_ = allocatorFlags;
+        u->allocatorContext = std::static_pointer_cast<void>(std::make_shared<ocl::Context>(ctx));
         CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
         u->markHostCopyObsolete(true);
         opencl_allocator_stats.onAllocate(u->size);
@@ -4931,15 +5502,26 @@ public:
             }
             if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
             {
-                bufferPool.release((cl_mem)u->handle);
+                std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
+                CV_Assert(pCtx);
+                ocl::Context& ctx = *pCtx.get();
+                CV_Assert(ctx.getImpl());
+                ctx.getImpl()->getBufferPool().release((cl_mem)u->handle);
             }
             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
             {
-                bufferPoolHostPtr.release((cl_mem)u->handle);
+                std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
+                CV_Assert(pCtx);
+                ocl::Context& ctx = *pCtx.get();
+                CV_Assert(ctx.getImpl());
+                ctx.getImpl()->getBufferPoolHostPtr().release((cl_mem)u->handle);
             }
 #ifdef HAVE_OPENCL_SVM
             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
             {
+                std::shared_ptr<ocl::Context> pCtx = std::static_pointer_cast<ocl::Context>(u->allocatorContext);
+                CV_Assert(pCtx);
+                ocl::Context& ctx = *pCtx.get();
                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
                 {
                     //nothing
@@ -4947,7 +5529,6 @@ public:
                 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
                         (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
                 {
-                    Context& ctx = Context::getDefault();
                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
                     CV_DbgAssert(svmFns->isValid());
                     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
@@ -4959,7 +5540,8 @@ public:
                         CV_OCL_CHECK_RESULT(status, "clEnqueueSVMUnmap()");
                     }
                 }
-                bufferPoolSVM.release((void*)u->handle);
+                CV_Assert(ctx.getImpl());
+                ctx.getImpl()->getBufferPoolSVM().release((void*)u->handle);
             }
 #endif
             else
@@ -5675,22 +6257,26 @@ public:
         }
     }
 
-    BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE {
+    BufferPoolController* getBufferPoolController(const char* id) const CV_OVERRIDE
+    {
+        ocl::Context ctx = Context::getDefault();
+        if (ctx.empty())
+            return NULL;
 #ifdef HAVE_OPENCL_SVM
         if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
         {
-            return &bufferPoolSVM;
+            return &ctx.getImpl()->getBufferPoolSVM();
         }
 #endif
         if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
         {
-            return &bufferPoolHostPtr;
+            return &ctx.getImpl()->getBufferPoolHostPtr();
         }
         if (id != NULL && strcmp(id, "OCL") != 0)
         {
             CV_Error(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
         }
-        return &bufferPool;
+        return &ctx.getImpl()->getBufferPool();
     }
 
     MatAllocator* matStdAllocator;
index a051647..97c3856 100644 (file)
@@ -144,6 +144,8 @@ const Device& Device::getDefault()
     return dummy;
 }
 
+/* static */ Device Device::fromHandle(void* d) { OCL_NOT_AVAILABLE(); }
+
 
 Context::Context() : p(NULL) { }
 Context::Context(int dtype) : p(NULL) { }
@@ -154,7 +156,7 @@ Context& Context::operator=(const Context& c) { return *this; }
 bool Context::create() { return false; }
 bool Context::create(int dtype) { return false; }
 size_t Context::ndevices() const { return 0; }
-const Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); }
+Device& Context::device(size_t idx) const { OCL_NOT_AVAILABLE(); }
 Program Context::getProg(const ProgramSource& prog, const String& buildopt, String& errmsg) { OCL_NOT_AVAILABLE(); }
 void Context::unloadProg(Program& prog) { }
 
@@ -169,6 +171,13 @@ void* Context::ptr() const { return NULL; }
 bool Context::useSVM() const { return false; }
 void Context::setUseSVM(bool enabled) { }
 
+/* static */ Context Context::fromHandle(void* context) { OCL_NOT_AVAILABLE(); }
+/* static */ Context Context::fromDevice(const ocl::Device& device) { OCL_NOT_AVAILABLE(); }
+/* static */ Context Context::create(const std::string& configuration) { OCL_NOT_AVAILABLE(); }
+
+void Context::release() { }
+
+
 Platform::Platform() : p(NULL) { }
 Platform::~Platform() { }
 Platform::Platform(const Platform&) : p(NULL) { }
@@ -355,6 +364,43 @@ MatAllocator* getOpenCLAllocator() { return NULL; }
 
 internal::ProgramEntry::operator ProgramSource&() const { OCL_NOT_AVAILABLE(); }
 
+
+struct OpenCLExecutionContext::Impl
+{
+    Impl() = default;
+};
+
+Context& OpenCLExecutionContext::getContext() const { OCL_NOT_AVAILABLE(); }
+Device& OpenCLExecutionContext::getDevice() const { OCL_NOT_AVAILABLE(); }
+Queue& OpenCLExecutionContext::getQueue() const { OCL_NOT_AVAILABLE(); }
+
+bool OpenCLExecutionContext::useOpenCL() const { return false; }
+void OpenCLExecutionContext::setUseOpenCL(bool flag) { }
+
+static
+OpenCLExecutionContext& getDummyOpenCLExecutionContext()
+{
+    static OpenCLExecutionContext dummy;
+    return dummy;
+}
+
+/* static */
+OpenCLExecutionContext& OpenCLExecutionContext::getCurrent() { return getDummyOpenCLExecutionContext(); }
+
+/* static */
+OpenCLExecutionContext& OpenCLExecutionContext::getCurrentRef() { return getDummyOpenCLExecutionContext(); }
+
+void OpenCLExecutionContext::bind() const { OCL_NOT_AVAILABLE(); }
+
+OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue(const ocl::Queue& q) const { OCL_NOT_AVAILABLE(); }
+OpenCLExecutionContext OpenCLExecutionContext::cloneWithNewQueue() const { OCL_NOT_AVAILABLE(); }
+
+/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const std::string& platformName, void* platformID, void* context, void* deviceID) { OCL_NOT_AVAILABLE(); }
+/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device, const ocl::Queue& queue) { OCL_NOT_AVAILABLE(); }
+/* static */ OpenCLExecutionContext OpenCLExecutionContext::create(const Context& context, const Device& device) { OCL_NOT_AVAILABLE(); }
+
+void OpenCLExecutionContext::release() { }
+
 }}
 
 #if defined(_MSC_VER)
index fc042b9..37ab862 100644 (file)
@@ -1689,9 +1689,14 @@ Context& initializeContextFromGL()
     if (found < 0)
         CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for OpenGL interop");
 
-    Context& ctx = Context::getDefault(false);
-    initializeContextFromHandle(ctx, platforms[found], context, device);
-    return ctx;
+    cl_platform_id platform = platforms[found];
+    std::string platformName = PlatformInfo(platform).name();
+
+    OpenCLExecutionContext clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, deviceID);
+    clReleaseDevice(device);
+    clReleaseContext(context);
+    clExecCtx.bind();
+    return const_cast<Context&>(clExecCtx.getContext());
 #endif
 }
 
index 836488f..21e281c 100644 (file)
@@ -322,7 +322,7 @@ struct CoreTLSData
 {
     CoreTLSData() :
 //#ifdef HAVE_OPENCL
-        device(0), useOpenCL(-1),
+        oclExecutionContextInitialized(false), useOpenCL(-1),
 //#endif
         useIPP(-1),
         useIPP_NE(-1)
@@ -333,8 +333,8 @@ struct CoreTLSData
 
     RNG rng;
 //#ifdef HAVE_OPENCL
-    int device; // device index of an array of devices in a context, see also Device::getDefault
-    ocl::Queue oclQueue; // the queue used for running a kernel, see also getQueue, Kernel::run
+    ocl::OpenCLExecutionContext oclExecutionContext;
+    bool oclExecutionContextInitialized;
     int useOpenCL; // 1 - use, 0 - do not use, -1 - auto/not initialized
 //#endif
     int useIPP;    // 1 - use, 0 - do not use, -1 - auto/not initialized
index c571b90..42948dc 100644 (file)
@@ -106,7 +106,7 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
                                                                CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices);
             if ((status != CL_SUCCESS) || !(numDevices > 0))
                 continue;
-            numDevices = 1; // initializeContextFromHandle() expects only 1 device
+            numDevices = 1; // OpenCV expects only 1 device
             status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display,
                                                                CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL);
             if (status != CL_SUCCESS)
@@ -135,9 +135,23 @@ Context& initializeContextFromVA(VADisplay display, bool tryInterop)
         if (found >= 0)
         {
             contextInitialized = true;
-            Context& ctx = Context::getDefault(false);
-            initializeContextFromHandle(ctx, platforms[found], context, device);
-            return ctx;
+
+            cl_platform_id platform = platforms[found];
+            std::string platformName = PlatformInfo(platform).name();
+
+            OpenCLExecutionContext clExecCtx;
+            try
+            {
+                clExecCtx = OpenCLExecutionContext::create(platformName, platform, context, device);
+            }
+            catch (...)
+            {
+                clReleaseDevice(device);
+                clReleaseContext(context);
+                throw;
+            }
+            clExecCtx.bind();
+            return const_cast<Context&>(clExecCtx.getContext());
         }
     }
 # endif // HAVE_VA_INTEL && HAVE_OPENCL
diff --git a/modules/core/test/test_opencl.cpp b/modules/core/test/test_opencl.cpp
new file mode 100644 (file)
index 0000000..f4f195e
--- /dev/null
@@ -0,0 +1,191 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#include "test_precomp.hpp"
+#include "opencv2/ts/ocl_test.hpp"
+
+namespace opencv_test {
+namespace ocl {
+
+static void executeUMatCall(bool requireOpenCL = true)
+{
+    UMat a(100, 100, CV_8UC1, Scalar::all(0));
+    UMat b;
+    cv::add(a, Scalar::all(1), b);
+    Mat b_cpu = b.getMat(ACCESS_READ);
+    EXPECT_EQ(0, cv::norm(b_cpu - 1, NORM_INF));
+
+    if (requireOpenCL)
+    {
+        EXPECT_TRUE(cv::ocl::useOpenCL());
+    }
+}
+
+TEST(OCL_Context, createFromDevice)
+{
+    bool useOCL = cv::ocl::useOpenCL();
+
+    OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
+
+    if (!useOCL)
+    {
+        ASSERT_TRUE(ctx.empty());  // Other tests should not broke global state
+        throw SkipTestException("OpenCL is not available / disabled");
+    }
+
+    ASSERT_FALSE(ctx.empty());
+
+    ocl::Device device = ctx.getDevice();
+    ASSERT_FALSE(device.empty());
+
+    ocl::Context context = ocl::Context::fromDevice(device);
+    ocl::Context context2 = ocl::Context::fromDevice(device);
+
+    EXPECT_TRUE(context.getImpl() == context2.getImpl()) << "Broken cache for OpenCL context (device)";
+}
+
+TEST(OCL_OpenCLExecutionContext, basic)
+{
+    bool useOCL = cv::ocl::useOpenCL();
+
+    OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
+
+    if (!useOCL)
+    {
+        ASSERT_TRUE(ctx.empty());  // Other tests should not broke global state
+        throw SkipTestException("OpenCL is not available / disabled");
+    }
+
+    ASSERT_FALSE(ctx.empty());
+
+    ocl::Context context = ctx.getContext();
+    ocl::Context context2 = ocl::Context::getDefault();
+    EXPECT_TRUE(context.getImpl() == context2.getImpl());
+
+    ocl::Device device = ctx.getDevice();
+    ocl::Device device2 = ocl::Device::getDefault();
+    EXPECT_TRUE(device.getImpl() == device2.getImpl());
+
+    ocl::Queue queue = ctx.getQueue();
+    ocl::Queue queue2 = ocl::Queue::getDefault();
+    EXPECT_TRUE(queue.getImpl() == queue2.getImpl());
+}
+
+TEST(OCL_OpenCLExecutionContext, createAndBind)
+{
+    bool useOCL = cv::ocl::useOpenCL();
+
+    OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
+
+    if (!useOCL)
+    {
+        ASSERT_TRUE(ctx.empty());  // Other tests should not broke global state
+        throw SkipTestException("OpenCL is not available / disabled");
+    }
+
+    ASSERT_FALSE(ctx.empty());
+
+    ocl::Context context = ctx.getContext();
+    ocl::Device device = ctx.getDevice();
+
+    OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
+    ASSERT_FALSE(ctx2.empty());
+
+    try
+    {
+        ctx2.bind();
+        executeUMatCall();
+        ctx.bind();
+        executeUMatCall();
+    }
+    catch (...)
+    {
+        ctx.bind();  // restore
+        throw;
+    }
+}
+
+TEST(OCL_OpenCLExecutionContext, createGPU)
+{
+    bool useOCL = cv::ocl::useOpenCL();
+
+    OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
+
+    if (!useOCL)
+    {
+        ASSERT_TRUE(ctx.empty());  // Other tests should not broke global state
+        throw SkipTestException("OpenCL is not available / disabled");
+    }
+
+    ASSERT_FALSE(ctx.empty());
+
+    ocl::Context context = ocl::Context::create(":GPU:1");
+    if (context.empty())
+    {
+        context = ocl::Context::create(":CPU:");
+        if (context.empty())
+            throw SkipTestException("OpenCL GPU1/CPU devices are not available");
+    }
+
+    ocl::Device device = context.device(0);
+
+    OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
+    ASSERT_FALSE(ctx2.empty());
+
+    try
+    {
+        ctx2.bind();
+        executeUMatCall();
+        ctx.bind();
+        executeUMatCall();
+    }
+    catch (...)
+    {
+        ctx.bind();  // restore
+        throw;
+    }
+}
+
+TEST(OCL_OpenCLExecutionContext, ScopeTest)
+{
+    bool useOCL = cv::ocl::useOpenCL();
+
+    OpenCLExecutionContext ctx = OpenCLExecutionContext::getCurrent();
+
+    if (!useOCL)
+    {
+        ASSERT_TRUE(ctx.empty());  // Other tests should not broke global state
+        throw SkipTestException("OpenCL is not available / disabled");
+    }
+
+    ASSERT_FALSE(ctx.empty());
+
+    ocl::Context context = ocl::Context::create(":GPU:1");
+    if (context.empty())
+    {
+        context = ocl::Context::create(":CPU:");
+        if (context.empty())
+            context = ctx.getContext();
+    }
+
+    ocl::Device device = context.device(0);
+
+    OpenCLExecutionContext ctx2 = OpenCLExecutionContext::create(context, device);
+    ASSERT_FALSE(ctx2.empty());
+
+    try
+    {
+        OpenCLExecutionContextScope ctx_scope(ctx2);
+        executeUMatCall();
+    }
+    catch (...)
+    {
+        ctx.bind();  // restore
+        throw;
+    }
+
+    executeUMatCall();
+}
+
+} } // namespace opencv_test::ocl
index aa751cf..a542b2f 100644 (file)
@@ -26,6 +26,7 @@ add_subdirectory(dnn)
 add_subdirectory(gpu)
 add_subdirectory(tapi)
 add_subdirectory(opencl)
+add_subdirectory(sycl)
 if(WIN32 AND HAVE_DIRECTX)
   add_subdirectory(directx)
 endif()
@@ -122,6 +123,7 @@ endif()
 add_subdirectory(dnn)
 # add_subdirectory(gpu)
 add_subdirectory(opencl)
+add_subdirectory(sycl)
 # add_subdirectory(opengl)
 # add_subdirectory(openvx)
 add_subdirectory(tapi)
diff --git a/samples/sycl/CMakeLists.txt b/samples/sycl/CMakeLists.txt
new file mode 100644 (file)
index 0000000..093ed9f
--- /dev/null
@@ -0,0 +1,80 @@
+if(OPENCV_SKIP_SAMPLES_SYCL)
+  return()
+endif()
+
+ocv_install_example_src(opencl *.cpp *.hpp CMakeLists.txt)
+
+set(OPENCV_SYCL_SAMPLES_REQUIRED_DEPS
+  opencv_core
+  opencv_imgproc
+  opencv_imgcodecs
+  opencv_videoio
+  opencv_highgui)
+ocv_check_dependencies(${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS})
+
+if(NOT BUILD_EXAMPLES OR NOT OCV_DEPENDENCIES_FOUND OR OPENCV_SKIP_SAMPLES_BUILD_SYCL)
+  return()
+endif()
+
+if(CMAKE_VERSION VERSION_LESS "3.5")
+  message(STATUS "SYCL samples require CMake 3.5+")
+  return()
+endif()
+
+cmake_policy(VERSION 3.5)
+
+find_package(SYCL QUIET)  # will oneAPI support this straightforward way?
+
+if(NOT SYCL_FOUND AND NOT OPENCV_SKIP_SAMPLES_SYCL_ONEDNN)
+  # lets try scripts from oneAPI:oneDNN component
+  if(NOT DEFINED DNNLROOT AND DEFINED ENV{DNNLROOT})
+    set(DNNLROOT "$ENV{DNNLROOT}")
+  endif()
+  # Some verions of called script violate CMake policy and may emit unrecoverable CMake errors
+  # Use OPENCV_SKIP_SAMPLES_SYCL=1 / OPENCV_SKIP_SAMPLES_SYCL_ONEDNN to bypass this
+  find_package(dnnl CONFIG QUIET HINTS "${DNNLROOT}")
+endif()
+
+if(NOT SYCL_FOUND AND NOT OPENCV_SKIP_SAMPLES_SYCL_COMPUTECPP)
+  # lets try this SYCL SDK too: https://github.com/codeplaysoftware/computecpp-sdk
+  find_package(ComputeCpp QUIET)
+  if(ComputeCpp_FOUND)
+    set(SYCL_TARGET ComputeCpp::ComputeCpp)
+    set(SYCL_FLAGS ${ComputeCpp_FLAGS})
+    set(SYCL_INCLUDE_DIRS ${ComputeCpp_INCLUDE_DIRS})
+    set(SYCL_LIBRARIES ${ComputeCpp_LIBRARIES})
+  endif()
+endif()
+
+if(OPENCV_CMAKE_DEBUG_SYCL)
+  ocv_cmake_dump_vars("SYCL")  # OpenCV source tree is required
+endif()
+
+if(NOT SYCL_TARGET)
+  message(STATUS "SYCL/OpenCL samples are skipped: SYCL SDK is required")
+  message(STATUS "   - check configuration of SYCL_DIR/SYCL_ROOT/CMAKE_MODULE_PATH")
+  message(STATUS "   - ensure that right compiler is selected from SYCL SDK (e.g, clang++): CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
+  return()
+endif()
+
+project(sycl_samples)
+
+if(SYCL_FLAGS)  # "target_link_libraries(... ${SYCL_TARGET})" is not enough. Hacking...
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SYCL_FLAGS}")
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${SYCL_FLAGS}")
+endif()
+
+ocv_include_modules_recurse(${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS})
+ocv_include_directories(${OpenCL_INCLUDE_DIR})
+file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
+foreach(sample_filename ${all_samples})
+  ocv_define_sample(tgt ${sample_filename} sycl)
+  ocv_target_link_libraries(${tgt} PRIVATE
+    ${OPENCV_LINKER_LIBS}
+    ${OPENCV_SYCL_SAMPLES_REQUIRED_DEPS}
+    ${SYCL_TARGET})
+
+  if(COMMAND add_sycl_to_target)  # ComputeCpp
+    add_sycl_to_target(TARGET ${tgt} SOURCES ${sample_filename})
+  endif()
+endforeach()
diff --git a/samples/sycl/sycl-opencv-interop.cpp b/samples/sycl/sycl-opencv-interop.cpp
new file mode 100644 (file)
index 0000000..ccb8eaf
--- /dev/null
@@ -0,0 +1,351 @@
+/*
+ * The example of interoperability between SYCL/OpenCL and OpenCV.
+ * - SYCL: https://www.khronos.org/sycl/
+ * - SYCL runtime parameters: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
+ */
+#include <CL/sycl.hpp>
+
+#include <opencv2/core.hpp>
+#include <opencv2/highgui.hpp>
+#include <opencv2/videoio.hpp>
+#include <opencv2/imgproc.hpp>
+
+#include <opencv2/core/ocl.hpp>
+
+
+class sycl_inverse_kernel;  // can be omitted - modern SYCL versions doesn't require this
+
+using namespace cv;
+
+
+class App
+{
+public:
+    App(const CommandLineParser& cmd);
+    ~App();
+
+    void initVideoSource();
+
+    void initSYCL();
+
+    void process_frame(cv::Mat& frame);
+
+    /// to check result with CPU-only reference code
+    Mat process_frame_reference(const cv::Mat& frame);
+
+    int run();
+
+    bool isRunning() { return m_running; }
+    bool doProcess() { return m_process; }
+
+    void setRunning(bool running)      { m_running = running; }
+    void setDoProcess(bool process)    { m_process = process; }
+
+protected:
+    void handleKey(char key);
+
+private:
+    bool                        m_running;
+    bool                        m_process;
+    bool                        m_show_ui;
+
+    int64                       m_t0;
+    int64                       m_t1;
+    float                       m_time;
+    float                       m_frequency;
+
+    std::string                 m_file_name;
+    int                         m_camera_id;
+    cv::VideoCapture            m_cap;
+    cv::Mat                     m_frame;
+
+    cl::sycl::queue sycl_queue;
+};
+
+
+App::App(const CommandLineParser& cmd)
+{
+    m_camera_id  = cmd.get<int>("camera");
+    m_file_name  = cmd.get<std::string>("video");
+
+    m_running    = false;
+    m_process    = false;
+} // ctor
+
+
+App::~App()
+{
+    // nothing
+}
+
+
+void App::initSYCL()
+{
+    using namespace cl::sycl;
+
+    // Configuration details: https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md
+    cl::sycl::default_selector selector;
+
+    sycl_queue = cl::sycl::queue(selector, [](cl::sycl::exception_list l)
+    {
+        // exception_handler
+        for (auto ep : l)
+        {
+            try
+            {
+                std::rethrow_exception(ep);
+            }
+            catch (const cl::sycl::exception& e)
+            {
+                std::cerr << "SYCL exception: " << e.what() << std::endl;
+            }
+        }
+    });
+
+    auto device = sycl_queue.get_device();
+    auto platform = device.get_platform();
+    std::cout << "SYCL device: " << device.get_info<info::device::name>()
+        << " @ " << device.get_info<info::device::driver_version>()
+        << " (platform: " << platform.get_info<info::platform::name>() << ")" << std::endl;
+
+    if (device.is_host())
+    {
+        std::cerr << "SYCL can't select OpenCL device. Host is used for computations, interoperability is not available" << std::endl;
+    }
+    else
+    {
+        // bind OpenCL context/device/queue from SYCL to OpenCV
+        try
+        {
+            auto ctx = cv::ocl::OpenCLExecutionContext::create(
+                    platform.get_info<info::platform::name>(),
+                    platform.get(),
+                    sycl_queue.get_context().get(),
+                    device.get()
+                );
+            ctx.bind();
+        }
+        catch (const cv::Exception& e)
+        {
+            std::cerr << "OpenCV: Can't bind SYCL OpenCL context/device/queue: " << e.what() << std::endl;
+        }
+        std::cout << "OpenCV uses OpenCL: " << (cv::ocl::useOpenCL() ? "True" : "False") << std::endl;
+    }
+} // initSYCL()
+
+
+void App::initVideoSource()
+{
+    if (!m_file_name.empty() && m_camera_id == -1)
+    {
+        m_cap.open(samples::findFileOrKeep(m_file_name));
+        if (!m_cap.isOpened())
+            throw std::runtime_error(std::string("can't open video stream: ") + m_file_name);
+    }
+    else if (m_camera_id != -1)
+    {
+        m_cap.open(m_camera_id);
+        if (!m_cap.isOpened())
+            throw std::runtime_error(std::string("can't open camera: ") + std::to_string(m_camera_id));
+    }
+    else
+        throw std::runtime_error(std::string("specify video source"));
+} // initVideoSource()
+
+
+void App::process_frame(cv::Mat& frame)
+{
+    using namespace cl::sycl;
+
+    // cv::Mat => cl::sycl::buffer
+    {
+        CV_Assert(frame.isContinuous());
+        CV_CheckTypeEQ(frame.type(), CV_8UC1, "");
+
+        buffer<uint8_t, 2> frame_buffer(frame.data, range<2>(frame.rows, frame.cols));
+
+        // done automatically: frame_buffer.set_write_back(true);
+
+        sycl_queue.submit([&](handler& cgh) {
+          auto pixels = frame_buffer.get_access<access::mode::read_write>(cgh);
+
+          cgh.parallel_for<class sycl_inverse_kernel>(range<2>(frame.rows, frame.cols), [=](item<2> item) {
+              uint8_t v = pixels[item];
+              pixels[item] = ~v;
+          });
+        });
+
+        sycl_queue.wait_and_throw();
+    }
+
+    // No way to extract cl_mem from cl::sycl::buffer (ref: 3.6.11 "Interfacing with OpenCL" of SYCL 1.2.1)
+    // We just reusing OpenCL context/device/queue from SYCL here (see initSYCL() bind part) and call UMat processing
+    {
+        UMat blurResult;
+        {
+            UMat umat_buffer = frame.getUMat(ACCESS_RW);
+            cv::blur(umat_buffer, blurResult, Size(3, 3));  // UMat doesn't support inplace
+        }
+        Mat result;
+        blurResult.copyTo(result);
+        swap(result, frame);
+    }
+}
+
+Mat App::process_frame_reference(const cv::Mat& frame)
+{
+    Mat result;
+    cv::bitwise_not(frame, result);
+    Mat blurResult;
+    cv::blur(result, blurResult, Size(3, 3));  // avoid inplace
+    blurResult.copyTo(result);
+    return result;
+}
+
+int App::run()
+{
+    std::cout << "Initializing..." << std::endl;
+
+    initSYCL();
+    initVideoSource();
+
+    std::cout << "Press ESC to exit" << std::endl;
+    std::cout << "      'p' to toggle ON/OFF processing" << std::endl;
+
+    m_running = true;
+    m_process = true;
+    m_show_ui = true;
+
+    int processedFrames = 0;
+
+    cv::TickMeter timer;
+
+    // Iterate over all frames
+    while (isRunning() && m_cap.read(m_frame))
+    {
+        Mat m_frameGray;
+        cvtColor(m_frame, m_frameGray, COLOR_BGR2GRAY);
+
+        bool checkWithReference = (processedFrames == 0);
+        Mat reference_result;
+        if (checkWithReference)
+        {
+            reference_result = process_frame_reference(m_frameGray);
+        }
+
+        timer.reset();
+        timer.start();
+
+        if (m_process)
+        {
+            process_frame(m_frameGray);
+        }
+
+        timer.stop();
+
+        if (checkWithReference)
+        {
+            double diffInf = cv::norm(reference_result, m_frameGray, NORM_INF);
+            if (diffInf > 0)
+            {
+                std::cerr << "Result is not accurate. diffInf=" << diffInf << std::endl;
+                imwrite("reference.png", reference_result);
+                imwrite("actual.png", m_frameGray);
+            }
+        }
+
+        Mat img_to_show = m_frameGray;
+
+        std::ostringstream msg;
+        msg << "Frame " << processedFrames << " (" << m_frame.size
+            << ")   Time: " << cv::format("%.2f", timer.getTimeMilli()) << " msec"
+            << " (process: " << (m_process ? "True" : "False") << ")";
+        std::cout << msg.str() << std::endl;
+        putText(img_to_show, msg.str(), Point(5, 150), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
+
+        if (m_show_ui)
+        {
+            try
+            {
+                imshow("sycl_interop", img_to_show);
+                int key = waitKey(1);
+                switch (key)
+                {
+                case 27:  // ESC
+                    m_running = false;
+                    break;
+
+                case 'p':  // fallthru
+                case 'P':
+                    m_process = !m_process;
+                    break;
+
+                default:
+                    break;
+                }
+            }
+            catch (const std::exception& e)
+            {
+                std::cerr << "ERROR(OpenCV UI): " << e.what() << std::endl;
+                if (processedFrames > 0)
+                    throw;
+                m_show_ui = false;  // UI is not available
+            }
+        }
+
+        processedFrames++;
+
+        if (!m_show_ui)
+        {
+            if (processedFrames > 100)
+                m_running = false;
+        }
+    }
+
+    return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+    const char* keys =
+        "{ help h ?    |          | print help message }"
+        "{ camera c    | -1       | use camera as input }"
+        "{ video  v    |          | use video as input }";
+
+    CommandLineParser cmd(argc, argv, keys);
+    if (cmd.has("help"))
+    {
+        cmd.printMessage();
+        return EXIT_SUCCESS;
+    }
+
+    try
+    {
+        App app(cmd);
+        if (!cmd.check())
+        {
+            cmd.printErrors();
+            return 1;
+        }
+        app.run();
+    }
+    catch (const cv::Exception& e)
+    {
+        std::cout << "FATAL: OpenCV error: " << e.what() << std::endl;
+        return 1;
+    }
+    catch (const std::exception& e)
+    {
+        std::cout << "FATAL: C++ error: " << e.what() << std::endl;
+        return 1;
+    }
+
+    catch (...)
+    {
+        std::cout << "FATAL: unknown C++ exception" << std::endl;
+        return 1;
+    }
+
+    return EXIT_SUCCESS;
+} // main()