Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / ocl_toolkit.h
index 50c7460..1a69bd4 100644 (file)
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-// we want exceptions
-#define CL_HPP_ENABLE_EXCEPTIONS
-#define CL_HPP_MINIMUM_OPENCL_VERSION 120
-#define CL_HPP_TARGET_OPENCL_VERSION 120
-#include <cl2_wrapper.h>
+#include "ocl_builder.h"
 
-#include "api/CPP/profiling.hpp"
 #include "kernels_cache.h"
 #include "engine_info.h"
 #include "event_impl.h"
+#include "confiugration.h"
 
 #include <memory>
 #include <chrono>
 
-namespace cldnn { 
+namespace cldnn {
     typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
-    typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;    
+    typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
 namespace gpu {
-
 typedef  CL_API_ENTRY cl_command_queue(CL_API_CALL *pfn_clCreateCommandQueueWithPropertiesINTEL)(
     cl_context context,
     cl_device_id device,
@@ -48,26 +43,7 @@ public:
     ocl_error(cl::Error const& err);
 };
 
-struct configuration
-{
-    enum device_types { default_device = 0, cpu, gpu, accelerator };
-
-    configuration();
-
-    bool enable_profiling;
-    bool meaningful_kernels_names;
-    bool dump_custom_program;
-    device_types device_type;
-    uint32_t device_vendor;
-    std::string compiler_options;
-    std::string single_kernel_name;
-    bool host_out_of_order;
-    std::string log;
-    std::string ocl_sources_dumps_dir;
-    cldnn_priority_mode_type priority_mode;
-    cldnn_throttle_mode_type throttle_mode;
-};
-
+class events_pool;
 class gpu_toolkit;
 
 class context_holder
@@ -82,42 +58,18 @@ protected:
 
 };
 
-struct profiling_period_event : instrumentation::profiling_period
-{
-    profiling_period_event(const cl::Event& event, cl_profiling_info start, cl_profiling_info end)
-        : _event(event)
-        , _start(start)
-        , _end(end)
-    {}
-
-    std::chrono::nanoseconds value() const override
-    {
-        cl_ulong start_nanoseconds;
-        _event.getProfilingInfo(_start, &start_nanoseconds);
-        cl_ulong end_nanoseconds;
-        _event.getProfilingInfo(_end, &end_nanoseconds);
-        return std::chrono::nanoseconds(static_cast<long long>(end_nanoseconds - start_nanoseconds));
-    }
-
-private:
-    cl::Event _event;
-    cl_profiling_info _start;
-    cl_profiling_info _end;
-};
-
 class gpu_toolkit : public std::enable_shared_from_this<gpu_toolkit>
 {
     friend class context_holder;
 
 protected:
     gpu_toolkit(const configuration& aconfiguration = configuration());
-
 public:
     static std::shared_ptr<gpu_toolkit> create(const configuration& cfg = configuration());
     const cl::Context& context() const { return _context; }
-    const cl::Device& device() const { return _device; }
+    const cl::Device& device() const { return _ocl_builder.get_device(); }
     const cl::CommandQueue& queue() const { return _command_queue; }
-    
+
     const configuration& get_configuration() const { return _configuration; }
     engine_info_internal get_engine_info() const { return _engine_info; }
     kernels_cache& get_kernels_cache() { return _kernels_cache; }
@@ -125,7 +77,7 @@ public:
     void store_binaries(kernels_binaries_vector binaries) { _binaries.push_back(binaries); }
     bool get_serialization_flag() { return _serialize; }
     void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; }
-    
+
     inline bool extension_supported(const std::string ext) { return _extensions.find(ext) != std::string::npos; }
 
     gpu_toolkit(const gpu_toolkit& other) = delete;
@@ -139,6 +91,9 @@ public:
     event_impl::ptr enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const& deps);
     event_impl::ptr enqueue_marker(std::vector<event_impl::ptr> const& deps);
     event_impl::ptr group_events(std::vector<event_impl::ptr> const& deps);
+    void reset_events();
+    event_impl::ptr create_user_event(bool set);
+    void release_events_pool();
 
     void flush();
     void release_pending_memory();
@@ -147,10 +102,10 @@ public:
     void log(uint64_t id, std::string const& msg);
     bool logging_enabled() const { return !_configuration.log.empty(); }
     bool is_neo_driver() { return _neo_driver; }
-
 private:
     configuration _configuration;
-    cl::Device _device;
+    ocl_builder _ocl_builder;
+    bool _user_context = false;
     bool _neo_driver = false;
     cl::Context _context;
     cl::CommandQueue _command_queue;
@@ -162,6 +117,7 @@ private:
 
     std::atomic<uint64_t> _queue_counter{ 0 };
     std::atomic<uint64_t> _last_barrier{ 0 };
+    std::unique_ptr<events_pool> _events_pool;
     cl::Event _last_barrier_ev;
 
     std::string _extensions;
@@ -174,7 +130,9 @@ private:
     bool _output_event = false;
     std::ofstream& open_log();
 
-    std::string get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }
+    std::string get_device_version() { return _ocl_builder.get_device().getInfo<CL_DEVICE_VERSION>(); }
+
+    void build_command_queues(const configuration& config);
 };
 
 }}