Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / thirdparty / clDNN / src / gpu / ocl_toolkit.cpp
index d74a036..0044ec3 100644 (file)
@@ -18,6 +18,8 @@
 #include "ocl_toolkit.h"
 #include "ocl_base_event.h"
 #include "ocl_user_event.h"
+#include "command_queues_builder.h"
+#include "events_pool.h"
 
 #include <cassert>
 #include <iomanip>
@@ -70,96 +72,6 @@ ocl_error::ocl_error(cl::Error const & err) : error(err.what() + std::string(",
 {
 }
 
-namespace {
-
-    cl_device_type convert_configuration_device_type(configuration::device_types device_type)
-    {
-        cl_device_type device_types[] = {
-                CL_DEVICE_TYPE_DEFAULT,
-                CL_DEVICE_TYPE_CPU,
-                CL_DEVICE_TYPE_GPU,
-                CL_DEVICE_TYPE_ACCELERATOR };
-        return device_types[device_type];
-    }
-    bool does_device_match_config(cl::Device const& dev, configuration const& config, std::list<std::string>& reasons)
-    {
-        auto dev_name = dev.getInfo<CL_DEVICE_NAME>();
-        bool ok = true;
-
-        auto dev_type = dev.getInfo<CL_DEVICE_TYPE>();
-
-        if (dev_type != convert_configuration_device_type(config.device_type))
-        {
-            reasons.push_back(dev_name + ": invalid device type");
-            ok = false;
-        }
-
-        auto vendor_id = dev.getInfo<CL_DEVICE_VENDOR_ID>();
-        if (vendor_id != config.device_vendor)
-        {
-            reasons.push_back(dev_name + ": invalid vendor type");
-            ok = false;
-        }
-
-        if (config.host_out_of_order)
-        {
-            auto queue_properties = dev.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
-            using cmp_t = std::common_type<decltype(queue_properties), typename std::underlying_type<cl::QueueProperties>::type>::type;
-            if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder)))
-            {
-                reasons.push_back(dev_name + ": missing out of order support");
-                ok = false;
-            }
-        }
-
-        return ok;
-    }
-}
-
-cl::Device get_gpu_device(const configuration& config, cl_platform_id& platform_id)
-{
-    std::list<std::string> reasons;
-    cl_uint n = 0;
-
-    // Get number of platforms availible
-    cl_int err = clGetPlatformIDs(0, NULL, &n);
-    if (err != CL_SUCCESS) {
-        throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
-    }
-
-    // Get platform list
-    std::vector<cl_platform_id> platform_ids(n);
-    err = clGetPlatformIDs(n, platform_ids.data(), NULL);
-    if (err != CL_SUCCESS) {
-        throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
-    }
-
-    for (auto& id : platform_ids)
-    {
-        cl::Platform platform = cl::Platform(id);
-        std::vector<cl::Device> devices;
-        platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
-        for (auto& d : devices)
-        {
-            if (does_device_match_config(d, config, reasons))
-            {
-                platform_id = id;
-                return d;
-            }
-        }
-    }
-
-    if (reasons.empty())
-        throw std::runtime_error("Could not find any OpenCL device");
-
-    std::string error_msg = "No OpenCL device found which would match provided configuration:";
-    for (const auto& reason : reasons)
-        error_msg += "\n    " + reason;
-
-    throw std::invalid_argument(std::move(error_msg));
-}
-
 std::shared_ptr<gpu_toolkit> gpu_toolkit::create(const configuration & cfg)
 {
     struct make_shared_wa : public gpu_toolkit { make_shared_wa(const configuration& cfg) : gpu_toolkit(cfg) {} };
@@ -176,116 +88,21 @@ struct gpu_toolkit::ocl_logger
     std::ofstream _log_file;
 };
 
-gpu_toolkit::gpu_toolkit(const configuration& config) 
+gpu_toolkit::gpu_toolkit(const configuration& config)
     : _configuration(config)
-    , _device(get_gpu_device(config, _platform_id))
+    , _ocl_builder(config)
+    , _user_context(_ocl_builder.is_user_context())
     , _neo_driver(strstr(get_device_version().c_str(), "NEO") ? true : false)
-    , _context(_device)
-    , _command_queue(_context,
-                     _device,
-                     (config.enable_profiling
-                        ? cl::QueueProperties::Profiling
-                        : cl::QueueProperties::None) | 
-                     (config.host_out_of_order && _neo_driver
-                        ? cl::QueueProperties::OutOfOrder
-                        : cl::QueueProperties::None))
+    , _context(_ocl_builder.get_context())
+    , _platform_id(_ocl_builder.get_platform_id())
     , _engine_info(*this)
     , _kernels_cache(*this)
+    , _events_pool(new events_pool())
 {
-    _device.getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
-
-    cl_command_queue_properties queue_properties =
-        ((config.enable_profiling) ?
-            CL_QUEUE_PROFILING_ENABLE :
-            0) |
-            ((config.host_out_of_order &&
-                _neo_driver) ?
-                CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE :
-                0);
-
-    if (_configuration.priority_mode != cldnn_priority_disabled)
-    {
-        if (extension_supported("cl_khr_priority_hints") &&
-            extension_supported("cl_intelx_create_command_queue"))
-            // TODO add check when caps will be availible (instead of cl_intelx_create_command_queue)
-            //&& extension_supported("cl_khr_create_command_queue"))
-        {
-            // TODO: When cl_khr_create_command_queue will be availible the
-            // function name will change to clCreateCommandQueueWithPropertiesKHR
-            // in place of clCreateCommandQueueWithPropertiesINTEL.
-#ifndef WIN32
-    #pragma GCC diagnostic push
-    #pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-            pfn_clCreateCommandQueueWithPropertiesINTEL clCreateCommandQueueWithPropertiesINTEL =
-                (pfn_clCreateCommandQueueWithPropertiesINTEL)clGetExtensionFunctionAddressForPlatform(
-                    _platform_id,
-                    "clCreateCommandQueueWithPropertiesINTEL");
-#ifndef WIN32
-    #pragma GCC diagnostic pop
-#endif
-            unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
-
-            switch (_configuration.priority_mode)
-            {
-            case cldnn_priority_high:
-                cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
-                break;
-            case cldnn_priority_low:
-                cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
-                break;
-            default:
-                break;
-            }
-
-            cl_int error_code = CL_SUCCESS;
-            cl_queue_properties properties_low[] = {
-                CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value,
-                CL_QUEUE_PROPERTIES, queue_properties,
-                0 };
-
-            _command_queue = clCreateCommandQueueWithPropertiesINTEL(
-                _context.get(),
-                _device.get(),
-                properties_low,
-                &error_code);
-
-            if (error_code != CL_SUCCESS) {
-                throw std::runtime_error("clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
-            }
-        }
-        else
-        {
-            throw std::invalid_argument(
-                "The param priority_mode is set in engine_configuration,\
-                 but cl_khr_priority_hints or cl_khr_create_command_queue\
-                 is not supported by current OpenCL implementation.");
-        }
-    }
-    else
-    {
-        _command_queue = cl::CommandQueue(_context, _device, queue_properties);
-    }
-
-    if (_configuration.throttle_mode != cldnn_throttle_disabled)
-    {
-        if (extension_supported("cl_khr_throttle_hints"))
-        {
-            throw std::invalid_argument(
-                "The param throttle_mode is set in engine_configuration,\
-                 but it is placeholder for future use. It has no effect for now\
-                 and should be set to cldnn_throttle_disabled");
-        }
-        else
-        {
-            throw std::invalid_argument(
-                "The param throttle_mode is set in engine_configuration,\
-                 but cl_khr_throttle_hints is not supported by current OpenCL implementation.");
-        }
-    }
+    _ocl_builder.get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
+    build_command_queues(config);
 
     _logger = std::unique_ptr<ocl_logger>(new ocl_logger());
-
     if (logging_enabled())
     {
         open_log()
@@ -303,9 +120,7 @@ gpu_toolkit::gpu_toolkit(const configuration& config)
             << "    engine log: "          << _configuration.log << "\n"
             << "    sources dumps: "       << _configuration.ocl_sources_dumps_dir << "\n"
             << "\nEngine info:\n"
-            << "    configuration: "       << std::to_string(_engine_info.configuration) << "\n"
-            << "    model: "               << std::to_string(_engine_info.model) << "\n"
-            << "    architecture: "        << std::to_string(_engine_info.architecture) << "\n"
+            << "    device id: "           << _engine_info.dev_id << "\n"
             << "    cores count: "         << _engine_info.cores_count << "\n"
             << "    core frequencey: "     << _engine_info.core_frequency << "\n"
             << "    max work group size: " << _engine_info.max_work_group_size << "\n"
@@ -313,10 +128,28 @@ gpu_toolkit::gpu_toolkit(const configuration& config)
             << "    fp16: "                << std::boolalpha << (_engine_info.supports_fp16 != 0) << "\n"
             << "    fp16 denorms: "        << std::boolalpha << (_engine_info.supports_fp16_denorms != 0) << "\n"
             << "    subgroups short: "     << std::boolalpha << (_engine_info.supports_subgroups_short != 0) << "\n"
+            << "    used defined context: "<< std::boolalpha << _user_context << "\n"
             << std::endl;
     }
 }
 
+void gpu_toolkit::build_command_queues(const configuration& config)
+{
+    command_queues_builder queue_builder(_context, _ocl_builder.get_device(), _platform_id);
+    queue_builder.set_profiling(config.enable_profiling);
+    queue_builder.set_out_of_order((config.host_out_of_order && _neo_driver));
+
+    bool priorty_extensions = extension_supported("cl_khr_priority_hints") && extension_supported("cl_khr_create_command_queue");
+    queue_builder.set_priority_mode(config.priority_mode, priorty_extensions);
+
+    bool throttle_extensions = extension_supported("cl_khr_throttle_hints") && extension_supported("cl_khr_create_command_queue");
+    queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions);
+
+    queue_builder.build();
+
+    _command_queue = queue_builder.queue();
+}
+
 event_impl::ptr gpu_toolkit::enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const & deps)
 {
     std::vector<cl::Event> dep_events;
@@ -358,14 +191,13 @@ event_impl::ptr gpu_toolkit::enqueue_kernel(cl::Kernel const& kern, cl::NDRange
 
         log(_queue_counter + 1, msg);
     }
-
-    return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+    return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
 }
 
 event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const& deps)
 {
     if (deps.empty())
-        return{ new user_event(shared_from_this(), true), false };
+        return _events_pool->get_from_user_pool(shared_from_this(), true);
 
     if (!_configuration.host_out_of_order)
     {
@@ -379,7 +211,7 @@ event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const&
 
             try {
                 _command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev);
-            } 
+            }
             catch (cl::Error const& err) {
                 throw ocl_error(err);
             }
@@ -396,19 +228,33 @@ event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const&
 
         if (logging_enabled())
             log(_queue_counter + 1, "Marker with dependencies: " + events_list_to_string(deps));
-
-        return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+        return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
     }
     else
     {
         sync_events(deps);
-        return{ new base_event(shared_from_this(), _last_barrier_ev, _last_barrier), false };
+        return _events_pool->get_from_base_pool(shared_from_this(), _last_barrier_ev, _last_barrier);
     }
 }
 
 event_impl::ptr gpu_toolkit::group_events(std::vector<event_impl::ptr> const& deps)
 {
-    return{ new base_events(shared_from_this(), deps), false };
+    return _events_pool->get_from_group_pool(shared_from_this(), deps);
+}
+
+event_impl::ptr gpu_toolkit::create_user_event(bool set)
+{
+    return _events_pool->get_from_user_pool(shared_from_this(), set);
+}
+
+void gpu_toolkit::reset_events()
+{
+    _events_pool->reset_events();
+}
+
+void gpu_toolkit::release_events_pool()
+{
+    _events_pool.reset();
 }
 
 void gpu_toolkit::flush()
@@ -419,7 +265,7 @@ void gpu_toolkit::flush()
 }
 void gpu_toolkit::release_pending_memory()
 {
-    /* 
+    /*
     TODO: Temp. solution, untill proper API calls from OpenCL are released.
     */
     void* ptr = nullptr;
@@ -483,14 +329,14 @@ void gpu_toolkit::sync_events(std::vector<event_impl::ptr> const & deps)
     {
         try {
             if (_output_event)
-            { 
+            {
                 _command_queue.enqueueBarrierWithWaitList(nullptr, &_last_barrier_ev);
             }
             else
             {
                 _command_queue.enqueueBarrierWithWaitList(nullptr, nullptr);
             }
-            
+
         }
         catch (cl::Error const& err) {
             throw ocl_error(err);