2 // Copyright (c) 2017 Intel Corporation
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
17 ///////////////////////////////////////////////////////////////////////////////////////////////////
22 #include "memory_pool.h"
23 #include "engine_impl.h"
24 #include "memory_impl.h"
25 #include "program_impl.h"
27 #include "program_node.h"
29 #include "gpu/memory_gpu.h"
32 memory_record::memory_record(memory_set users, refcounted_obj_ptr<memory_impl>& memory, uint32_t net_id) :
38 memory_impl::ptr memory_pool::alloc_memory(const layout& layout)
40 auto context = _engine->get_context();
42 if (layout.bytes_count() > context->get_engine_info().max_alloc_mem_size)
44 throw error("exceeded max size of memory object allocation", CLDNN_ALLOC_SIZE_EXCEEDED);
47 add_memory_used(layout.bytes_count());
49 if (_max_peak_memory_used > context->get_engine_info().max_global_mem_size)
51 throw error("exceeded global device memory", CLDNN_GLOBAL_SIZE_EXCEEDED);
55 if (layout.format.is_image_2d())
56 return{ new gpu::gpu_image2d(_engine, layout), false };
58 return{ new gpu::gpu_buffer(_engine, layout), false };
60 catch (const cl::Error& clErr)
64 case CL_MEM_OBJECT_ALLOCATION_FAILURE:
65 case CL_OUT_OF_RESOURCES:
66 case CL_OUT_OF_HOST_MEMORY:
67 case CL_INVALID_BUFFER_SIZE:
68 throw error("out of GPU resources", CLDNN_OUT_OF_RESOURCES);
70 throw error("GPU buffer allocation failed", CLDNN_ERROR);
74 memory_pool::~memory_pool()
77 bool memory_pool::has_conflict(const memory_set& a, const std::set<primitive_id>& b, uint32_t b_network_id)
79 std::set<primitive_id> a_same_network;
80 for (auto const& mem_usr : a)
82 if (mem_usr._network_id == b_network_id)
84 a_same_network.insert(mem_usr._id);
87 std::vector<primitive_id> intersection;
88 intersection.reserve(std::min(a_same_network.size(), b.size()));
89 set_intersection(a_same_network.begin(), a_same_network.end(), b.begin(), b.end(), std::back_inserter(intersection));
90 return !intersection.empty();
93 memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions)
95 auto it = _non_padded_pool.lower_bound(layout.bytes_count());
96 while (it != _non_padded_pool.end())
98 if (!has_conflict(it->second._users, restrictions, network_id))
100 it->second._users.insert(memory_user( id, network_id ));
101 auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout);
107 // didn't find anything for you? create new resource
108 auto mem = alloc_memory(layout);
110 _non_padded_pool.emplace(layout.bytes_count(), memory_record({ {id, network_id } }, mem, network_id));
111 // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
117 memory_impl::ptr memory_pool::get_from_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions)
119 auto first_level_cache = _padded_pool.find(layout);
121 if (first_level_cache != _padded_pool.end())
123 for (auto& rec_list : first_level_cache->second)
125 if (layout.size.feature[0] <= rec_list._memory->get_layout().size.feature[0] &&
126 layout.size.batch[0] <= rec_list._memory->get_layout().size.batch[0] &&
127 !has_conflict(rec_list._users, restrictions, network_id))
129 rec_list._users.insert({ id, network_id });
130 auto ret_mem = _engine->reinterpret_buffer(*(rec_list._memory), layout);
134 auto mem = alloc_memory(layout);
135 first_level_cache->second.emplace_back(memory_record({ { id, network_id } }, mem, network_id));
136 // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
140 auto mem = alloc_memory(layout);
141 std::list<memory_record> list = { memory_record({ { id, network_id } },mem, network_id) };
142 _padded_pool.emplace(layout, std::move(list));
143 // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
149 This is not reusable within one network or it's internal micronetworks. But we can use this memory records between networks.
151 memory_impl::ptr memory_pool::get_from_across_networks_pool(const layout& layout, const primitive_id& id, uint32_t network_id)
153 auto it = _no_reusable_pool.lower_bound(layout.bytes_count());
155 while (it != _no_reusable_pool.end())
157 if (it->second._network_id != network_id) // don't use non reusable resources within the same network
159 if (!has_conflict(it->second._users, {}, network_id))
161 it->second._users.insert(memory_user(id, network_id));
162 auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout);
168 auto mem = alloc_memory(layout);
170 _no_reusable_pool.emplace(layout.bytes_count(), memory_record({ { id, network_id } }, mem, network_id));
171 // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
177 memory_impl::ptr memory_pool::get_memory(const layout& layout)
179 return alloc_memory(layout);
182 memory_impl::ptr memory_pool::get_memory(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions, bool reusable_across_network)
184 if (reusable_across_network) //reusable within the same network
186 if (!layout.format.is_image() && layout.data_padding == padding{ { 0,0,0,0 }, 0 }) // non-padded buffers
188 return get_from_non_padded_pool(layout, id, network_id, restrictions);
190 else if (!layout.format.is_image()) // padded buffers
192 return get_from_padded_pool(layout, id, network_id, restrictions);
196 // not yet implemented
197 return alloc_memory(layout);
202 return get_from_across_networks_pool(layout, id, network_id);
206 void memory_pool::clear_pool()
208 _non_padded_pool.clear();
211 memory_pool::memory_pool(engine_impl& engine)
213 , _temp_memory_used(0)
214 , _max_peak_memory_used(0)
216 _engine->release(); // since engine is refcount object and there is circular dependency until context will be moved to memory pool we need
217 // to detach engine while destroying memory pool
220 void memory_pool::dump_memory_pool(const program_impl& program , std::string path, std::string dep)
225 log << "\nNon-padded pool:" <<endl;
226 log << "Size\tUsers:" << endl;
227 for (const auto& record : _non_padded_pool)
230 for (const auto& usr : record.second._users)
235 log << "\n--- Padded pool: ---" << endl;
236 log << "Size\tUsers:" << endl;
237 for (const auto& record : _padded_pool)
239 for (const auto& mem : record.second)
241 log << mem._memory->size();
242 for (const auto& usr : mem._users)
249 color_graph(program);
252 void memory_pool::color_graph(const program_impl& program)
255 for (const auto& record : _non_padded_pool)
257 for (const auto& usr : record.second._users)
259 if (program.has_node(usr._id))
260 program.get_node(usr._id).set_reused_memory_color(color);
265 for (const auto& list : _padded_pool)
267 for (const auto& record : list.second)
269 if(record._users.size() > 1) // one user doesn't mean reusing
270 for (const auto& usr : record._users)
272 if (program.has_node(usr._id))
273 program.get_node(usr._id).set_reused_memory_color(color);
280 void memory_pool::add_memory_used(size_t value)
282 _temp_memory_used += value;
283 if (_temp_memory_used > _max_peak_memory_used)
285 _max_peak_memory_used = _temp_memory_used;
289 void memory_pool::subtract_memory_used(size_t value)
291 _temp_memory_used -= value;