2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
20 #include "ocl_builder.h"
22 #include "kernels_cache.h"
23 #include "engine_info.h"
24 #include "event_impl.h"
25 #include "confiugration.h"
31 typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
32 typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
34 typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *pfn_clCreateCommandQueueWithPropertiesINTEL)(
37 const cl_queue_properties *properties,
40 class ocl_error : public error
43 ocl_error(cl::Error const& err);
52 context_holder(std::shared_ptr<gpu_toolkit> context) : _context(context) {}
53 virtual ~context_holder() = default;
55 const std::shared_ptr<gpu_toolkit>& context() const { return _context; }
57 std::shared_ptr<gpu_toolkit> _context;
61 class gpu_toolkit : public std::enable_shared_from_this<gpu_toolkit>
63 friend class context_holder;
66 gpu_toolkit(const configuration& aconfiguration = configuration());
68 static std::shared_ptr<gpu_toolkit> create(const configuration& cfg = configuration());
69 const cl::Context& context() const { return _context; }
70 const cl::Device& device() const { return _ocl_builder.get_device(); }
71 const cl::CommandQueue& queue() const { return _command_queue; }
73 const configuration& get_configuration() const { return _configuration; }
74 engine_info_internal get_engine_info() const { return _engine_info; }
75 kernels_cache& get_kernels_cache() { return _kernels_cache; }
76 kernels_binaries_container get_binaries() { return _binaries; }
77 void store_binaries(kernels_binaries_vector binaries) { _binaries.push_back(binaries); }
78 bool get_serialization_flag() { return _serialize; }
79 void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; }
81 inline bool extension_supported(const std::string ext) { return _extensions.find(ext) != std::string::npos; }
83 gpu_toolkit(const gpu_toolkit& other) = delete;
84 gpu_toolkit(gpu_toolkit&& other) = delete;
85 gpu_toolkit& operator=(const gpu_toolkit& other) = delete;
86 gpu_toolkit& operator=(gpu_toolkit&& other) = delete;
87 std::string single_kernel_name() const { return _configuration.single_kernel_name; }
88 bool enabled_single_kernel() const { return single_kernel_name() == "" ? false : true; }
89 void set_output_event(bool out_event) { _output_event = out_event; }
91 event_impl::ptr enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const& deps);
92 event_impl::ptr enqueue_marker(std::vector<event_impl::ptr> const& deps);
93 event_impl::ptr group_events(std::vector<event_impl::ptr> const& deps);
95 event_impl::ptr create_user_event(bool set);
96 void release_events_pool();
99 void release_pending_memory();
100 void wait_for_events(std::vector<event_impl::ptr> const& events);
102 void log(uint64_t id, std::string const& msg);
103 bool logging_enabled() const { return !_configuration.log.empty(); }
104 bool is_neo_driver() { return _neo_driver; }
106 configuration _configuration;
107 ocl_builder _ocl_builder;
108 bool _user_context = false;
109 bool _neo_driver = false;
110 cl::Context _context;
111 cl::CommandQueue _command_queue;
112 cl_platform_id _platform_id;
113 engine_info_internal _engine_info;
114 kernels_cache _kernels_cache;
115 kernels_binaries_container _binaries;
116 bool _serialize = false;
118 std::atomic<uint64_t> _queue_counter{ 0 };
119 std::atomic<uint64_t> _last_barrier{ 0 };
120 std::unique_ptr<events_pool> _events_pool;
121 cl::Event _last_barrier_ev;
123 std::string _extensions;
126 std::unique_ptr<ocl_logger> _logger;
128 //returns whether a barrier has been added
129 void sync_events(std::vector<event_impl::ptr> const& deps);
130 bool _output_event = false;
131 std::ofstream& open_log();
133 std::string get_device_version() { return _ocl_builder.get_device().getInfo<CL_DEVICE_VERSION>(); }
135 void build_command_queues(const configuration& config);