inference-engine/src/gna_plugin/gna_memory.hpp

   1 // Copyright (C) 2018-2019 Intel Corporation
   2 // SPDX-License-Identifier: Apache-2.0
   3 //
   4
   5 #pragma once
   6 #include "gna_mem_requests.hpp"
   7 #include <memory>
   8 #include <vector>
   9 #include <list>
  10 #include <algorithm>
  11 #include <functional>
  12
  13 /**
  14  * Pads memory size to given number of Bytes
  15  *
  16  * Please always use this padding macro for consistency
  17  *
  18  * @memSize size (in bytes) of memory to be padded
  19  * @align   number of bytes to pad
  20  * @return  memory size (int bytes) padded to given value
  21  */
  22 #ifndef ALIGN
  23 # define ALIGN(memSize, pad)   (static_cast<int>(((memSize) + pad -1) / pad) * pad)
  24 #endif
  25
  26 namespace GNAPluginNS {
  27
  28
  29
  30 /**
  31  * @brief encapsulate various request to allocate GNA specific memory,
  32  * in order to issue single allocation call and configure actual pointers in requests
  33  * @tparam Allocator - a GNAAllocator in case of actual HW offloads
  34  */
  35 template<class Allocator = std::allocator<uint8_t>>
  36 class GNAMemory : public GNAMemRequestsQueue {
  37     std::vector<MemRequest> _future_heap;
  38     std::list<std::vector<char>> _local_storage;
  39     size_t _total = 0;
  40     size_t _rw_section_size = 0;
  41     size_t _ro_section_size = 0;
  42     Allocator _allocator;
  43     std::shared_ptr<uint8_t> heap;
  44     size_t _page_alignment = 1;
  45
  46     class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
  47         std::reference_wrapper<GNAMemRequestsQueue> _that;
  48      public:
  49         explicit GNAMemRequestsReadOnlyQueue(GNAMemory & that) : _that(that) {
  50         }
  51         rRegion regionType() const override {
  52             return REGION_RO;
  53         };
  54         std::vector<MemRequest> & futureHeap()  override {
  55             return _that.get().futureHeap();
  56         }
  57         std::list<std::vector<char>> &localStorage() override {
  58             return _that.get().localStorage();
  59         }
  60     };
  61
  62     GNAMemRequestsReadOnlyQueue readOnlyFrontEnd;
  63
  64  public:
  65     explicit GNAMemory(size_t pageAlignment = 1)
  66         : readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
  67
  68     explicit GNAMemory(const Allocator &a, size_t pageAlignment = 1)
  69         : _allocator(a), readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
  70
  71     GNAMemRequestsQueue & readonly() {
  72         return readOnlyFrontEnd;
  73     }
  74
  75     /**
  76      * @brief calculates size required for all requests, allocates memory and updates pointers
  77      */
  78     void commit() {
  79         // 1st stage -- looking for expandable bind requests:
  80         for (auto &originated : _future_heap) {
  81             if (originated._type == REQUEST_BIND) continue;
  82             size_t offset = 0;
  83             iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
  84                 if (&originated == &reference) {
  85                     offset = 0;
  86                 }
  87                 offset += binded._offset;
  88                 auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
  89                 auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
  90                 auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
  91
  92                 originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
  93             });
  94         }
  95
  96         updateSectionsSizes();
  97
  98         _total = _rw_section_size + _ro_section_size;
  99
 100         // allocation with memory setting to 0 internally
 101         heap = allocate(_total);
 102         auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
 103             for (auto &re : _future_heap) {
 104                 if (re._type == REQUEST_BIND) continue;
 105                 if (filter(re)) continue;
 106
 107                 auto sz = re._element_size * re._num_elements;
 108
 109                 if (re._ptr_out != nullptr) {
 110                     auto cptr = heap.get() + offset;
 111                     *reinterpret_cast<void **>(re._ptr_out) = cptr;
 112                     // std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
 113                     iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
 114                         *reinterpret_cast<void **>(binded._ptr_out) =
 115                             binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
 116                     });
 117
 118                     // std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
 119
 120                     switch (re._type) {
 121                         case REQUEST_ALLOCATE :break;
 122                         case REQUEST_STORE : {
 123                             if (re._ptr_in != nullptr) {
 124                                 memcpy(cptr, re._ptr_in, sz);
 125                             } else {
 126                                 size_t of = 0;
 127                                 for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
 128                                     std::copy(std::begin(re._data), std::end(re._data), cptr + of);
 129                                 }
 130                             }
 131                             break;
 132                         }
 133                         case REQUEST_INITIALIZER : {
 134                             re._initializer(cptr, sz);
 135                             break;
 136                         }
 137                     }
 138                 }
 139
 140                 offset += ALIGN(sz + re._padding, re._alignment);
 141             }
 142         };
 143
 144         setupOffsets([](MemRequest & request) {
 145             return request._region != REGION_RW;
 146         }, 0);
 147
 148         setupOffsets([](MemRequest & request) {
 149             return request._region != REGION_RO;
 150         }, _rw_section_size);
 151     }
 152
 153     void *getBasePtr() {
 154         return heap.get();
 155     }
 156
 157     size_t getRWBytes() {
 158         updateSectionsSizes();
 159         return _rw_section_size;
 160     }
 161
 162     size_t getTotalBytes() {
 163         updateSectionsSizes();
 164         return _total;
 165     }
 166
 167  protected:
 168     rRegion regionType() const override {
 169         return REGION_RW;
 170     };
 171     std::vector<MemRequest> & futureHeap()  override {
 172         return _future_heap;
 173     }
 174     std::list<std::vector<char>> &localStorage() override {
 175         return _local_storage;
 176     }
 177
 178     template<class T>
 179     void iterate_binded(MemRequest & reference, const T & visitor) {
 180         for (auto &re : _future_heap) {
 181             if (re._type == REQUEST_BIND && re._ptr_in == reference._ptr_out) {
 182                 // std::cout << "  [binded=" << re._ptr_out <<"]\n";
 183                 visitor(reference, re);
 184                 // TODO: no circular dependency checking, only tree-style dependency supported
 185                 iterate_binded(re, visitor);
 186             }
 187         }
 188     }
 189
 190
 191     std::shared_ptr<uint8_t> allocate(size_t bytes) {
 192         std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
 193             _allocator.deallocate(p, bytes);
 194         });
 195         std::fill(sp.get(), sp.get() + bytes, 0);
 196         return sp;
 197     }
 198
 199  protected:
 200     void updateSectionsSizes() {
 201         // count total size and size of read/write regions
 202         _rw_section_size = 0;
 203         _ro_section_size = 0;
 204         for (auto &re : _future_heap) {
 205             auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
 206 #ifdef GNA_HEAP_PROFILER
 207             std::cout << "chunk: " << " region: " << re._region << ", " <<
 208                     "type: " << (re._type  == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind  " : "alloc ") <<
 209                     std::setw(10) << re._num_elements << ", " <<
 210                     static_cast<int>(re._element_size) << ", " <<
 211                     re._padding << ", " <<
 212                     re._offset << ", " <<
 213                     re._alignment << std::endl;
 214 #endif
 215             if (re._type == REQUEST_BIND) continue;
 216
 217             if (re._region == REGION_RW) {
 218                 _rw_section_size += current;
 219             } else {
 220                 _ro_section_size += current;
 221             }
 222         }
 223         _rw_section_size = ALIGN(_rw_section_size, _page_alignment);
 224         _ro_section_size = ALIGN(_ro_section_size, _page_alignment);
 225     }
 226 };
 227 }  // namespace GNAPluginNS