2 // Copyright (c) 2016 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
18 #include "engine_impl.h"
19 #include "event_impl.h"
20 #include "program_impl.h"
21 #include "network_impl.h"
22 #include "gpu/ocl_toolkit.h"
23 #include "gpu/memory_gpu.h"
24 #include "gpu/ocl_user_event.h"
28 using gpu_toolkit_config = gpu::configuration;
30 gpu_toolkit_config convert_configuration(const engine_configuration conf)
32 gpu_toolkit_config result;
33 result.compiler_options = conf.compiler_options;
34 result.enable_profiling = conf.enable_profiling != 0;
35 result.meaningful_kernels_names = conf.meaningful_kernels_names != 0;
36 result.dump_custom_program = conf.dump_custom_program != 0;
37 result.single_kernel_name = conf.single_kernel_name;
38 result.host_out_of_order = true; //TODO: enable when barriers in driver will be fixed
39 result.log = conf.engine_log;
40 result.ocl_sources_dumps_dir = conf.sources_dumps_dir;
41 result.priority_mode = static_cast<cldnn_priority_mode_type>(conf.priority_mode);
42 result.throttle_mode = static_cast<cldnn_throttle_mode_type>(conf.throttle_mode);
43 result.user_context = static_cast<cl::Context*>(conf.context);
44 result.tuning_cache_path = conf.tuning_cache_path;
48 engine_impl::engine_impl(const engine_configuration& conf)
49 : _configuration(conf)
50 , _context(gpu_toolkit::create(convert_configuration(conf)))
54 engine_impl::~engine_impl()
57 Engine, which is main owner of context deallocate events pool manually, because
58 of the event_impl <-> gpu_toolkit dependencies.
60 _context->release_events_pool();
63 memory_impl::ptr engine_impl::allocate_memory(layout layout)
65 return _memory_pool.get_memory(layout);
68 memory_impl::ptr engine_impl::allocate_memory(layout layout, primitive_id id, uint32_t network_id, std::set<primitive_id> dependencies, bool reusable)
70 if (use_memory_pool())
71 return _memory_pool.get_memory(layout, id, network_id, dependencies, reusable);
72 return _memory_pool.get_memory(layout);
75 memory_impl::ptr engine_impl::reinterpret_buffer(const memory_impl& memory, layout new_layout)
77 if (memory.get_engine() != this)
78 throw error("trying to reinterpret buffer allocated by a different engine", CLDNN_ERROR);
80 if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
81 throw error("trying to reinterpret non-image buffer as image", CLDNN_ERROR);
83 if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
84 throw error("trying to reinterpret image buffer as non-image buffer", CLDNN_ERROR);
87 if (new_layout.format.is_image_2d())
88 return{ new gpu::gpu_image2d(this, new_layout, reinterpret_cast<const gpu::gpu_image2d&>(memory).get_buffer()), false };
90 return{ new gpu::gpu_buffer(this, new_layout, reinterpret_cast<const gpu::gpu_buffer&>(memory).get_buffer()), false };
92 catch (cl::Error const& err) {
93 throw gpu::ocl_error(err);
97 bool engine_impl::is_the_same_buffer(const memory_impl& mem1, const memory_impl& mem2)
99 if (mem1.get_engine() != this || mem2.get_engine() != this)
104 return (reinterpret_cast<const gpu::gpu_buffer&>(mem1).get_buffer() == reinterpret_cast<const gpu::gpu_buffer&>(mem2).get_buffer());
107 event_impl::ptr engine_impl::create_user_event(bool set)
110 return _context->create_user_event(set);
112 catch (cl::Error const& err) {
113 throw gpu::ocl_error(err);
117 void engine_impl::flush_network()
119 get_context()->flush();
122 void engine_impl::release_pending_memory()
124 get_context()->release_pending_memory();
127 program_impl::ptr engine_impl::build_program(const topology_impl& topology, const build_options& options, bool is_internal, bool no_optimizations)
129 return{ new program_impl(*this, topology, options, is_internal, no_optimizations), false };
132 program_impl::ptr engine_impl::build_program(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
134 return{ new program_impl(*this, nodes, options, is_internal), false };
137 network_impl::ptr engine_impl::build_network(const topology_impl& topology, const build_options& options, bool is_internal)
139 return{ new network_impl(*this, topology, options, is_internal), false };
142 network_impl::ptr engine_impl::build_network(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
144 return{ new network_impl(*this, nodes, options, is_internal), false };
147 network_impl::ptr engine_impl::allocate_network(const program_impl& program, bool is_internal)
149 return{ new network_impl(program, is_internal), false };
152 void engine_impl::wait_for_events(std::vector<event_impl::ptr> const & events)
155 _context->wait_for_events(events);
158 gpu::engine_info_internal engine_impl::get_engine_info() const
160 return _context->get_engine_info();
163 void engine_impl::compile_program(program_impl& program)
165 if (!program.get_options().get<build_option_type::serialize_network>()->serialization_network_name.empty())
166 _context->get_kernels_cache().get_context().set_serialization_flag(true);
167 //TODO: better compilation logic instead of a simple 'compile all'?
168 _context->get_kernels_cache().build_all();
171 bool engine_impl::use_memory_pool() const
173 if (configuration().enable_memory_pool && get_context()->is_neo_driver())