///////////////////////////////////////////////////////////////////////////////////////////////////
#pragma once
-// we want exceptions
-#define CL_HPP_ENABLE_EXCEPTIONS
-#define CL_HPP_MINIMUM_OPENCL_VERSION 120
-#define CL_HPP_TARGET_OPENCL_VERSION 120
-#include <cl2_wrapper.h>
+#include "ocl_builder.h"
-#include "api/CPP/profiling.hpp"
#include "kernels_cache.h"
#include "engine_info.h"
#include "event_impl.h"
+#include "confiugration.h"
#include <memory>
#include <chrono>
-namespace cldnn {
+namespace cldnn {
typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
- typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
+ typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
namespace gpu {
-
typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *pfn_clCreateCommandQueueWithPropertiesINTEL)(
cl_context context,
cl_device_id device,
ocl_error(cl::Error const& err);
};
-struct configuration
-{
- enum device_types { default_device = 0, cpu, gpu, accelerator };
-
- configuration();
-
- bool enable_profiling;
- bool meaningful_kernels_names;
- bool dump_custom_program;
- device_types device_type;
- uint32_t device_vendor;
- std::string compiler_options;
- std::string single_kernel_name;
- bool host_out_of_order;
- std::string log;
- std::string ocl_sources_dumps_dir;
- cldnn_priority_mode_type priority_mode;
- cldnn_throttle_mode_type throttle_mode;
-};
-
+class events_pool;
class gpu_toolkit;
class context_holder
};
-struct profiling_period_event : instrumentation::profiling_period
-{
- profiling_period_event(const cl::Event& event, cl_profiling_info start, cl_profiling_info end)
- : _event(event)
- , _start(start)
- , _end(end)
- {}
-
- std::chrono::nanoseconds value() const override
- {
- cl_ulong start_nanoseconds;
- _event.getProfilingInfo(_start, &start_nanoseconds);
- cl_ulong end_nanoseconds;
- _event.getProfilingInfo(_end, &end_nanoseconds);
- return std::chrono::nanoseconds(static_cast<long long>(end_nanoseconds - start_nanoseconds));
- }
-
-private:
- cl::Event _event;
- cl_profiling_info _start;
- cl_profiling_info _end;
-};
-
class gpu_toolkit : public std::enable_shared_from_this<gpu_toolkit>
{
friend class context_holder;
protected:
gpu_toolkit(const configuration& aconfiguration = configuration());
-
public:
static std::shared_ptr<gpu_toolkit> create(const configuration& cfg = configuration());
const cl::Context& context() const { return _context; }
- const cl::Device& device() const { return _device; }
+ const cl::Device& device() const { return _ocl_builder.get_device(); }
const cl::CommandQueue& queue() const { return _command_queue; }
-
+
const configuration& get_configuration() const { return _configuration; }
engine_info_internal get_engine_info() const { return _engine_info; }
kernels_cache& get_kernels_cache() { return _kernels_cache; }
void store_binaries(kernels_binaries_vector binaries) { _binaries.push_back(binaries); }
bool get_serialization_flag() { return _serialize; }
void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; }
-
+
inline bool extension_supported(const std::string ext) { return _extensions.find(ext) != std::string::npos; }
gpu_toolkit(const gpu_toolkit& other) = delete;
event_impl::ptr enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const& deps);
event_impl::ptr enqueue_marker(std::vector<event_impl::ptr> const& deps);
event_impl::ptr group_events(std::vector<event_impl::ptr> const& deps);
+ void reset_events();
+ event_impl::ptr create_user_event(bool set);
+ void release_events_pool();
void flush();
void release_pending_memory();
void log(uint64_t id, std::string const& msg);
bool logging_enabled() const { return !_configuration.log.empty(); }
bool is_neo_driver() { return _neo_driver; }
-
private:
configuration _configuration;
- cl::Device _device;
+ ocl_builder _ocl_builder;
+ bool _user_context = false;
bool _neo_driver = false;
cl::Context _context;
cl::CommandQueue _command_queue;
std::atomic<uint64_t> _queue_counter{ 0 };
std::atomic<uint64_t> _last_barrier{ 0 };
+ std::unique_ptr<events_pool> _events_pool;
cl::Event _last_barrier_ev;
std::string _extensions;
bool _output_event = false;
std::ofstream& open_log();
- std::string get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }
+ std::string get_device_version() { return _ocl_builder.get_device().getInfo<CL_DEVICE_VERSION>(); }
+
+ void build_command_queues(const configuration& config);
};
}}