inference-engine/thirdparty/clDNN/src/memory_pool.cpp

   1 /*
   2 // Copyright (c) 2017 Intel Corporation
   3 //
   4 // Licensed under the Apache License, Version 2.0 (the "License");
   5 // you may not use this file except in compliance with the License.
   6 // You may obtain a copy of the License at
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // Unless required by applicable law or agreed to in writing, software
  11 // distributed under the License is distributed on an "AS IS" BASIS,
  12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 // See the License for the specific language governing permissions and
  14 // limitations under the License.
  15 */
  16
  17 ///////////////////////////////////////////////////////////////////////////////////////////////////
  18
  19 #include <algorithm>
  20 #include <fstream>
  21
  22 #include "memory_pool.h"
  23 #include "engine_impl.h"
  24 #include "memory_impl.h"
  25 #include "program_impl.h"
  26
  27 #include "program_node.h"
  28
  29 #include "gpu/memory_gpu.h"
  30 namespace cldnn
  31 {
  32     memory_record::memory_record(memory_set users, refcounted_obj_ptr<memory_impl>& memory, uint32_t net_id) :
  33         _users(users)
  34         , _memory(memory)
  35         , _network_id(net_id)
  36     {}
  37
  38     memory_impl::ptr memory_pool::alloc_memory(const layout& layout)
  39     {
  40         auto context = _engine->get_context();
  41
  42         if (layout.bytes_count() > context->get_engine_info().max_alloc_mem_size)
  43         {
  44             throw error("exceeded max size of memory object allocation", CLDNN_ALLOC_SIZE_EXCEEDED);
  45         }
  46
  47         add_memory_used(layout.bytes_count());
  48
  49         if (_max_peak_memory_used > context->get_engine_info().max_global_mem_size)
  50         {
  51             throw error("exceeded global device memory", CLDNN_GLOBAL_SIZE_EXCEEDED);
  52         }
  53
  54         try {
  55             if (layout.format.is_image_2d())
  56                 return{ new gpu::gpu_image2d(_engine, layout), false };
  57             else
  58                 return{ new gpu::gpu_buffer(_engine, layout), false };
  59         }
  60         catch (const cl::Error& clErr)
  61         {
  62             switch (clErr.err())
  63             {
  64             case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  65             case CL_OUT_OF_RESOURCES:
  66             case CL_OUT_OF_HOST_MEMORY:
  67             case CL_INVALID_BUFFER_SIZE:
  68                 throw error("out of GPU resources", CLDNN_OUT_OF_RESOURCES);
  69             default:
  70                 throw error("GPU buffer allocation failed", CLDNN_ERROR);
  71             }
  72         }
  73     }
  74     memory_pool::~memory_pool()
  75     { }
  76
  77     bool memory_pool::has_conflict(const memory_set& a, const std::set<primitive_id>& b, uint32_t b_network_id)
  78     {
  79         std::set<primitive_id> a_same_network;
  80         for (auto const& mem_usr : a)
  81         {
  82             if (mem_usr._network_id == b_network_id)
  83             {
  84                 a_same_network.insert(mem_usr._id);
  85             }
  86         }
  87         std::vector<primitive_id> intersection;
  88         intersection.reserve(std::min(a_same_network.size(), b.size()));
  89         set_intersection(a_same_network.begin(), a_same_network.end(), b.begin(), b.end(), std::back_inserter(intersection));
  90         return !intersection.empty();
  91     }
  92
  93     memory_impl::ptr memory_pool::get_from_non_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions)
  94     {
  95         auto it = _non_padded_pool.lower_bound(layout.bytes_count());
  96         while (it != _non_padded_pool.end())
  97         {
  98             if (!has_conflict(it->second._users, restrictions, network_id))
  99             {
 100                 it->second._users.insert(memory_user( id, network_id ));
 101                 auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout);
 102                 return ret_mem;
 103             }
 104             else
 105                 ++it;
 106         }
 107         // didn't find anything for you? create new resource
 108         auto mem = alloc_memory(layout);
 109         {
 110             _non_padded_pool.emplace(layout.bytes_count(), memory_record({ {id, network_id } }, mem, network_id));
 111             // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
 112             _engine->release();
 113         }
 114         return mem;
 115     }
 116
 117     memory_impl::ptr memory_pool::get_from_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions)
 118     {
 119         auto first_level_cache = _padded_pool.find(layout);
 120
 121         if (first_level_cache != _padded_pool.end())
 122         {
 123             for (auto& rec_list : first_level_cache->second)
 124             {
 125                 if (layout.size.feature[0] <= rec_list._memory->get_layout().size.feature[0] &&
 126                     layout.size.batch[0] <= rec_list._memory->get_layout().size.batch[0] &&
 127                     !has_conflict(rec_list._users, restrictions, network_id))
 128                 {
 129                     rec_list._users.insert({ id, network_id });
 130                     auto ret_mem = _engine->reinterpret_buffer(*(rec_list._memory), layout);
 131                     return ret_mem;
 132                 }
 133             }
 134             auto mem = alloc_memory(layout);
 135             first_level_cache->second.emplace_back(memory_record({ { id, network_id } }, mem, network_id));
 136             // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
 137             _engine->release();
 138             return mem;
 139         }
 140         auto mem = alloc_memory(layout);
 141         std::list<memory_record> list = { memory_record({ { id, network_id } },mem, network_id) };
 142         _padded_pool.emplace(layout, std::move(list));
 143         // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
 144         _engine->release();
 145         return mem;
 146     }
 147
 148     /*
 149         This is not reusable within one network or it's internal micronetworks. But we can use this memory records between networks.
 150     */
 151     memory_impl::ptr memory_pool::get_from_across_networks_pool(const layout& layout, const primitive_id& id, uint32_t network_id)
 152     {
 153         auto it = _no_reusable_pool.lower_bound(layout.bytes_count());
 154
 155         while (it != _no_reusable_pool.end())
 156         {
 157             if (it->second._network_id != network_id) // don't use non reusable resources within the same network
 158             {
 159                 if (!has_conflict(it->second._users, {}, network_id))
 160                 {
 161                     it->second._users.insert(memory_user(id, network_id));
 162                     auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout);
 163                     return ret_mem;
 164                 }
 165             }
 166             ++it;
 167         }
 168         auto mem = alloc_memory(layout);
 169         {
 170             _no_reusable_pool.emplace(layout.bytes_count(), memory_record({ { id, network_id } }, mem, network_id));
 171             // we don't want to store any resources with no parents so memory pool has to store weak pointer of _engine.
 172             _engine->release();
 173         }
 174         return mem;
 175     }
 176
 177     memory_impl::ptr memory_pool::get_memory(const layout& layout)
 178     {
 179         return alloc_memory(layout);
 180     }
 181
 182     memory_impl::ptr memory_pool::get_memory(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>& restrictions, bool reusable_across_network)
 183     {
 184         if (reusable_across_network) //reusable within the same network
 185         {
 186             if (!layout.format.is_image() && layout.data_padding == padding{ { 0,0,0,0 }, 0 }) // non-padded buffers
 187             {
 188                 return get_from_non_padded_pool(layout, id, network_id, restrictions);
 189             }
 190             else if (!layout.format.is_image()) // padded buffers
 191             {
 192                 return get_from_padded_pool(layout, id, network_id, restrictions);
 193             }
 194             else  // images
 195             {
 196                 // not yet implemented
 197                 return alloc_memory(layout);
 198             }
 199         }
 200         else
 201         {
 202             return get_from_across_networks_pool(layout, id, network_id);
 203         }
 204     }
 205
 206     void memory_pool::clear_pool()
 207     {
 208         _non_padded_pool.clear();
 209     }
 210
 211     memory_pool::memory_pool(engine_impl& engine)
 212         : _engine(&engine)
 213         , _temp_memory_used(0)
 214         , _max_peak_memory_used(0)
 215     {
 216         _engine->release(); // since engine is refcount object and there is circular dependency until context will be moved to memory pool we need
 217                             // to detach engine while destroying memory pool
 218     }
 219
 220     void memory_pool::dump_memory_pool(const program_impl& program , std::string path, std::string dep)
 221     {
 222         using namespace std;
 223         ofstream log(path);
 224
 225         log << "\nNon-padded pool:" <<endl;
 226         log << "Size\tUsers:" << endl;
 227         for (const auto& record : _non_padded_pool)
 228         {
 229             log << record.first;
 230             for (const auto& usr : record.second._users)
 231                 log << ", " << usr;
 232             log << endl;
 233         }
 234
 235         log << "\n--- Padded pool: ---" << endl;
 236         log << "Size\tUsers:" << endl;
 237         for (const auto& record : _padded_pool)
 238         {
 239             for (const auto& mem : record.second)
 240             {
 241                 log << mem._memory->size();
 242                 for (const auto& usr : mem._users)
 243                     log << ", " << usr;
 244                 log << endl;
 245             }
 246         }
 247         log << dep;
 248         log.close();
 249         color_graph(program);
 250     }
 251
 252     void memory_pool::color_graph(const program_impl& program)
 253     {
 254         uint32_t color = 0;
 255         for (const auto& record : _non_padded_pool)
 256         {
 257             for (const auto& usr : record.second._users)
 258             {
 259                 if (program.has_node(usr._id))
 260                     program.get_node(usr._id).set_reused_memory_color(color);
 261             }
 262             ++color;
 263         }
 264
 265         for (const auto& list : _padded_pool)
 266         {
 267             for (const auto& record : list.second)
 268             {
 269                 if(record._users.size() > 1) // one user doesn't mean reusing
 270                     for (const auto& usr : record._users)
 271                     {
 272                         if (program.has_node(usr._id))
 273                             program.get_node(usr._id).set_reused_memory_color(color);
 274                     }
 275                 ++color;
 276             }
 277         }
 278     }
 279
 280     void memory_pool::add_memory_used(size_t value)
 281     {
 282         _temp_memory_used += value;
 283         if (_temp_memory_used > _max_peak_memory_used)
 284         {
 285             _max_peak_memory_used = _temp_memory_used;
 286         }
 287     }
 288
 289     void memory_pool::subtract_memory_used(size_t value)
 290     {
 291         _temp_memory_used -= value;
 292     }
 293
 294 }
 295