inference-engine/thirdparty/clDNN/src/engine.cpp

   1 /*
   2 // Copyright (c) 2016 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18 #include "engine_impl.h"
  19 #include "event_impl.h"
  20 #include "program_impl.h"
  21 #include "network_impl.h"
  22 #include "gpu/ocl_toolkit.h"
  23 #include "gpu/memory_gpu.h"
  24 #include "gpu/ocl_user_event.h"
  25
  26 namespace cldnn
  27 {
  28 using gpu_toolkit_config = gpu::configuration;
  29
  30 gpu_toolkit_config convert_configuration(const engine_configuration conf)
  31 {
  32     gpu_toolkit_config result;
  33     result.compiler_options = conf.compiler_options;
  34     result.enable_profiling = conf.enable_profiling != 0;
  35     result.meaningful_kernels_names = conf.meaningful_kernels_names != 0;
  36     result.dump_custom_program = conf.dump_custom_program != 0;
  37     result.single_kernel_name = conf.single_kernel_name;
  38     result.host_out_of_order = true; //TODO: enable when barriers in driver will be fixed
  39     result.log = conf.engine_log;
  40     result.ocl_sources_dumps_dir = conf.sources_dumps_dir;
  41     result.priority_mode = static_cast<cldnn_priority_mode_type>(conf.priority_mode);
  42     result.throttle_mode = static_cast<cldnn_throttle_mode_type>(conf.throttle_mode);
  43     result.user_context = static_cast<cl::Context*>(conf.context);
  44     result.tuning_cache_path = conf.tuning_cache_path;
  45     return result;
  46 }
  47
  48 engine_impl::engine_impl(const engine_configuration& conf)
  49     : _configuration(conf)
  50     , _context(gpu_toolkit::create(convert_configuration(conf)))
  51     , _memory_pool(*this)
  52 { }
  53
  54 engine_impl::~engine_impl()
  55 {
  56     /*
  57         Engine, which is main owner of context deallocate events pool manually, because
  58         of the event_impl <-> gpu_toolkit dependencies.
  59     */
  60     _context->release_events_pool();
  61 }
  62
  63 memory_impl::ptr engine_impl::allocate_memory(layout layout)
  64 {
  65     return _memory_pool.get_memory(layout);
  66 }
  67
  68 memory_impl::ptr engine_impl::allocate_memory(layout layout, primitive_id id, uint32_t network_id, std::set<primitive_id> dependencies, bool reusable)
  69 {
  70     if (use_memory_pool())
  71         return _memory_pool.get_memory(layout, id, network_id, dependencies, reusable);
  72     return _memory_pool.get_memory(layout);
  73 }
  74
  75 memory_impl::ptr engine_impl::reinterpret_buffer(const memory_impl& memory, layout new_layout)
  76 {
  77     if (memory.get_engine() != this)
  78         throw error("trying to reinterpret buffer allocated by a different engine", CLDNN_ERROR);
  79
  80     if (new_layout.format.is_image() && !memory.get_layout().format.is_image())
  81         throw error("trying to reinterpret non-image buffer as image", CLDNN_ERROR);
  82
  83     if (!new_layout.format.is_image() && memory.get_layout().format.is_image())
  84         throw error("trying to reinterpret image buffer as non-image buffer", CLDNN_ERROR);
  85
  86     try {
  87         if (new_layout.format.is_image_2d())
  88             return{ new gpu::gpu_image2d(this, new_layout, reinterpret_cast<const gpu::gpu_image2d&>(memory).get_buffer()), false };
  89         else
  90             return{ new gpu::gpu_buffer(this, new_layout, reinterpret_cast<const gpu::gpu_buffer&>(memory).get_buffer()), false };
  91     }
  92     catch (cl::Error const& err) {
  93         throw gpu::ocl_error(err);
  94     }
  95 }
  96
  97 bool engine_impl::is_the_same_buffer(const memory_impl& mem1, const memory_impl& mem2)
  98 {
  99     if (mem1.get_engine() != this || mem2.get_engine() != this)
 100         return false;
 101     if (&mem1 == &mem2)
 102         return true;
 103
 104     return (reinterpret_cast<const gpu::gpu_buffer&>(mem1).get_buffer() == reinterpret_cast<const gpu::gpu_buffer&>(mem2).get_buffer());
 105 }
 106
 107 event_impl::ptr engine_impl::create_user_event(bool set)
 108 {
 109     try {
 110         return _context->create_user_event(set);
 111     }
 112     catch (cl::Error const& err) {
 113         throw gpu::ocl_error(err);
 114     }
 115 }
 116
 117 void engine_impl::flush_network()
 118 {
 119     get_context()->flush();
 120 }
 121
 122 void engine_impl::release_pending_memory()
 123 {
 124     get_context()->release_pending_memory();
 125 }
 126
 127 program_impl::ptr engine_impl::build_program(const topology_impl& topology, const build_options& options, bool is_internal, bool no_optimizations)
 128 {
 129     return{ new program_impl(*this, topology, options, is_internal, no_optimizations), false };
 130 }
 131
 132 program_impl::ptr engine_impl::build_program(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
 133 {
 134     return{ new program_impl(*this, nodes, options, is_internal), false };
 135 }
 136
 137 network_impl::ptr engine_impl::build_network(const topology_impl& topology, const build_options& options, bool is_internal)
 138 {
 139     return{ new network_impl(*this, topology, options, is_internal), false };
 140 }
 141
 142 network_impl::ptr engine_impl::build_network(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
 143 {
 144     return{ new network_impl(*this, nodes, options, is_internal), false };
 145 }
 146
 147 network_impl::ptr engine_impl::allocate_network(const program_impl& program, bool is_internal)
 148 {
 149     return{ new network_impl(program, is_internal), false };
 150 }
 151
 152 void engine_impl::wait_for_events(std::vector<event_impl::ptr> const & events)
 153 {
 154     if (!events.empty())
 155         _context->wait_for_events(events);
 156 }
 157
 158 gpu::engine_info_internal engine_impl::get_engine_info() const
 159 {
 160     return _context->get_engine_info();
 161 }
 162
 163 void engine_impl::compile_program(program_impl& program)
 164 {
 165     if (!program.get_options().get<build_option_type::serialize_network>()->serialization_network_name.empty())
 166         _context->get_kernels_cache().get_context().set_serialization_flag(true);
 167     //TODO: better compilation logic instead of a simple 'compile all'?
 168     _context->get_kernels_cache().build_all();
 169 }
 170
 171 bool engine_impl::use_memory_pool() const
 172 {
 173     if (configuration().enable_memory_pool && get_context()->is_neo_driver())
 174     {
 175         return true;
 176     }
 177     return false;
 178 }
 179
 180 }