#include "ocl_toolkit.h"
#include "ocl_base_event.h"
#include "ocl_user_event.h"
+#include "command_queues_builder.h"
+#include "events_pool.h"
#include <cassert>
#include <iomanip>
{
}
-namespace {
-
- cl_device_type convert_configuration_device_type(configuration::device_types device_type)
- {
- cl_device_type device_types[] = {
- CL_DEVICE_TYPE_DEFAULT,
- CL_DEVICE_TYPE_CPU,
- CL_DEVICE_TYPE_GPU,
- CL_DEVICE_TYPE_ACCELERATOR };
- return device_types[device_type];
- }
-
- bool does_device_match_config(cl::Device const& dev, configuration const& config, std::list<std::string>& reasons)
- {
- auto dev_name = dev.getInfo<CL_DEVICE_NAME>();
- bool ok = true;
-
- auto dev_type = dev.getInfo<CL_DEVICE_TYPE>();
-
- if (dev_type != convert_configuration_device_type(config.device_type))
- {
- reasons.push_back(dev_name + ": invalid device type");
- ok = false;
- }
-
- auto vendor_id = dev.getInfo<CL_DEVICE_VENDOR_ID>();
- if (vendor_id != config.device_vendor)
- {
- reasons.push_back(dev_name + ": invalid vendor type");
- ok = false;
- }
-
- if (config.host_out_of_order)
- {
- auto queue_properties = dev.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
- using cmp_t = std::common_type<decltype(queue_properties), typename std::underlying_type<cl::QueueProperties>::type>::type;
- if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder)))
- {
- reasons.push_back(dev_name + ": missing out of order support");
- ok = false;
- }
- }
-
- return ok;
- }
-}
-
-cl::Device get_gpu_device(const configuration& config, cl_platform_id& platform_id)
-{
- std::list<std::string> reasons;
- cl_uint n = 0;
-
- // Get number of platforms availible
- cl_int err = clGetPlatformIDs(0, NULL, &n);
- if (err != CL_SUCCESS) {
- throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
- }
-
- // Get platform list
- std::vector<cl_platform_id> platform_ids(n);
- err = clGetPlatformIDs(n, platform_ids.data(), NULL);
- if (err != CL_SUCCESS) {
- throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
- }
-
- for (auto& id : platform_ids)
- {
- cl::Platform platform = cl::Platform(id);
- std::vector<cl::Device> devices;
- platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
- for (auto& d : devices)
- {
- if (does_device_match_config(d, config, reasons))
- {
- platform_id = id;
- return d;
- }
- }
- }
-
- if (reasons.empty())
- throw std::runtime_error("Could not find any OpenCL device");
-
- std::string error_msg = "No OpenCL device found which would match provided configuration:";
- for (const auto& reason : reasons)
- error_msg += "\n " + reason;
-
- throw std::invalid_argument(std::move(error_msg));
-}
-
std::shared_ptr<gpu_toolkit> gpu_toolkit::create(const configuration & cfg)
{
struct make_shared_wa : public gpu_toolkit { make_shared_wa(const configuration& cfg) : gpu_toolkit(cfg) {} };
std::ofstream _log_file;
};
-gpu_toolkit::gpu_toolkit(const configuration& config)
+gpu_toolkit::gpu_toolkit(const configuration& config)
: _configuration(config)
- , _device(get_gpu_device(config, _platform_id))
+ , _ocl_builder(config)
+ , _user_context(_ocl_builder.is_user_context())
, _neo_driver(strstr(get_device_version().c_str(), "NEO") ? true : false)
- , _context(_device)
- , _command_queue(_context,
- _device,
- (config.enable_profiling
- ? cl::QueueProperties::Profiling
- : cl::QueueProperties::None) |
- (config.host_out_of_order && _neo_driver
- ? cl::QueueProperties::OutOfOrder
- : cl::QueueProperties::None))
+ , _context(_ocl_builder.get_context())
+ , _platform_id(_ocl_builder.get_platform_id())
, _engine_info(*this)
, _kernels_cache(*this)
+ , _events_pool(new events_pool())
{
- _device.getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
-
- cl_command_queue_properties queue_properties =
- ((config.enable_profiling) ?
- CL_QUEUE_PROFILING_ENABLE :
- 0) |
- ((config.host_out_of_order &&
- _neo_driver) ?
- CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE :
- 0);
-
- if (_configuration.priority_mode != cldnn_priority_disabled)
- {
- if (extension_supported("cl_khr_priority_hints") &&
- extension_supported("cl_intelx_create_command_queue"))
- // TODO add check when caps will be availible (instead of cl_intelx_create_command_queue)
- //&& extension_supported("cl_khr_create_command_queue"))
- {
- // TODO: When cl_khr_create_command_queue will be availible the
- // function name will change to clCreateCommandQueueWithPropertiesKHR
- // in place of clCreateCommandQueueWithPropertiesINTEL.
-#ifndef WIN32
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wpedantic"
-#endif
- pfn_clCreateCommandQueueWithPropertiesINTEL clCreateCommandQueueWithPropertiesINTEL =
- (pfn_clCreateCommandQueueWithPropertiesINTEL)clGetExtensionFunctionAddressForPlatform(
- _platform_id,
- "clCreateCommandQueueWithPropertiesINTEL");
-#ifndef WIN32
- #pragma GCC diagnostic pop
-#endif
- unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
-
- switch (_configuration.priority_mode)
- {
- case cldnn_priority_high:
- cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
- break;
- case cldnn_priority_low:
- cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
- break;
- default:
- break;
- }
-
- cl_int error_code = CL_SUCCESS;
- cl_queue_properties properties_low[] = {
- CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value,
- CL_QUEUE_PROPERTIES, queue_properties,
- 0 };
-
- _command_queue = clCreateCommandQueueWithPropertiesINTEL(
- _context.get(),
- _device.get(),
- properties_low,
- &error_code);
-
- if (error_code != CL_SUCCESS) {
- throw std::runtime_error("clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
- }
- }
- else
- {
- throw std::invalid_argument(
- "The param priority_mode is set in engine_configuration,\
- but cl_khr_priority_hints or cl_khr_create_command_queue\
- is not supported by current OpenCL implementation.");
- }
- }
- else
- {
- _command_queue = cl::CommandQueue(_context, _device, queue_properties);
- }
-
- if (_configuration.throttle_mode != cldnn_throttle_disabled)
- {
- if (extension_supported("cl_khr_throttle_hints"))
- {
- throw std::invalid_argument(
- "The param throttle_mode is set in engine_configuration,\
- but it is placeholder for future use. It has no effect for now\
- and should be set to cldnn_throttle_disabled");
- }
- else
- {
- throw std::invalid_argument(
- "The param throttle_mode is set in engine_configuration,\
- but cl_khr_throttle_hints is not supported by current OpenCL implementation.");
- }
- }
+ _ocl_builder.get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
+ build_command_queues(config);
_logger = std::unique_ptr<ocl_logger>(new ocl_logger());
-
if (logging_enabled())
{
open_log()
<< " engine log: " << _configuration.log << "\n"
<< " sources dumps: " << _configuration.ocl_sources_dumps_dir << "\n"
<< "\nEngine info:\n"
- << " configuration: " << std::to_string(_engine_info.configuration) << "\n"
- << " model: " << std::to_string(_engine_info.model) << "\n"
- << " architecture: " << std::to_string(_engine_info.architecture) << "\n"
+ << " device id: " << _engine_info.dev_id << "\n"
<< " cores count: " << _engine_info.cores_count << "\n"
<< " core frequencey: " << _engine_info.core_frequency << "\n"
<< " max work group size: " << _engine_info.max_work_group_size << "\n"
<< " fp16: " << std::boolalpha << (_engine_info.supports_fp16 != 0) << "\n"
<< " fp16 denorms: " << std::boolalpha << (_engine_info.supports_fp16_denorms != 0) << "\n"
<< " subgroups short: " << std::boolalpha << (_engine_info.supports_subgroups_short != 0) << "\n"
+ << " used defined context: "<< std::boolalpha << _user_context << "\n"
<< std::endl;
}
}
+void gpu_toolkit::build_command_queues(const configuration& config)
+{
+ command_queues_builder queue_builder(_context, _ocl_builder.get_device(), _platform_id);
+ queue_builder.set_profiling(config.enable_profiling);
+ queue_builder.set_out_of_order((config.host_out_of_order && _neo_driver));
+
+ bool priorty_extensions = extension_supported("cl_khr_priority_hints") && extension_supported("cl_khr_create_command_queue");
+ queue_builder.set_priority_mode(config.priority_mode, priorty_extensions);
+
+ bool throttle_extensions = extension_supported("cl_khr_throttle_hints") && extension_supported("cl_khr_create_command_queue");
+ queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions);
+
+ queue_builder.build();
+
+ _command_queue = queue_builder.queue();
+}
+
event_impl::ptr gpu_toolkit::enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const & deps)
{
std::vector<cl::Event> dep_events;
log(_queue_counter + 1, msg);
}
-
- return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+ return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
}
event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const& deps)
{
if (deps.empty())
- return{ new user_event(shared_from_this(), true), false };
+ return _events_pool->get_from_user_pool(shared_from_this(), true);
if (!_configuration.host_out_of_order)
{
try {
_command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev);
- }
+ }
catch (cl::Error const& err) {
throw ocl_error(err);
}
if (logging_enabled())
log(_queue_counter + 1, "Marker with dependencies: " + events_list_to_string(deps));
-
- return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+ return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
}
else
{
sync_events(deps);
- return{ new base_event(shared_from_this(), _last_barrier_ev, _last_barrier), false };
+ return _events_pool->get_from_base_pool(shared_from_this(), _last_barrier_ev, _last_barrier);
}
}
event_impl::ptr gpu_toolkit::group_events(std::vector<event_impl::ptr> const& deps)
{
- return{ new base_events(shared_from_this(), deps), false };
+ return _events_pool->get_from_group_pool(shared_from_this(), deps);
+}
+
+event_impl::ptr gpu_toolkit::create_user_event(bool set)
+{
+ return _events_pool->get_from_user_pool(shared_from_this(), set);
+}
+
+void gpu_toolkit::reset_events()
+{
+ _events_pool->reset_events();
+}
+
+void gpu_toolkit::release_events_pool()
+{
+ _events_pool.reset();
}
void gpu_toolkit::flush()
}
void gpu_toolkit::release_pending_memory()
{
- /*
+ /*
TODO: Temp. solution, untill proper API calls from OpenCL are released.
*/
void* ptr = nullptr;
{
try {
if (_output_event)
- {
+ {
_command_queue.enqueueBarrierWithWaitList(nullptr, &_last_barrier_ev);
}
else
{
_command_queue.enqueueBarrierWithWaitList(nullptr, nullptr);
}
-
+
}
catch (cl::Error const& err) {
throw ocl_error(err);