1 // Copyright (C) 2018-2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
6 #include "gna_mem_requests.hpp"
14 * Pads memory size to given number of Bytes
16 * Please always use this padding macro for consistency
18 * @memSize size (in bytes) of memory to be padded
19 * @align number of bytes to pad
20 * @return memory size (int bytes) padded to given value
23 # define ALIGN(memSize, pad) (static_cast<int>(((memSize) + pad -1) / pad) * pad)
26 namespace GNAPluginNS {
31 * @brief encapsulate various request to allocate GNA specific memory,
32 * in order to issue single allocation call and configure actual pointers in requests
33 * @tparam Allocator - a GNAAllocator in case of actual HW offloads
35 template<class Allocator = std::allocator<uint8_t>>
36 class GNAMemory : public GNAMemRequestsQueue {
37 std::vector<MemRequest> _future_heap;
38 std::list<std::vector<char>> _local_storage;
40 size_t _rw_section_size = 0;
41 size_t _ro_section_size = 0;
43 std::shared_ptr<uint8_t> heap;
44 size_t _page_alignment = 1;
46 class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
47 std::reference_wrapper<GNAMemRequestsQueue> _that;
49 explicit GNAMemRequestsReadOnlyQueue(GNAMemory & that) : _that(that) {
51 rRegion regionType() const override {
54 std::vector<MemRequest> & futureHeap() override {
55 return _that.get().futureHeap();
57 std::list<std::vector<char>> &localStorage() override {
58 return _that.get().localStorage();
62 GNAMemRequestsReadOnlyQueue readOnlyFrontEnd;
65 explicit GNAMemory(size_t pageAlignment = 1)
66 : readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
68 explicit GNAMemory(const Allocator &a, size_t pageAlignment = 1)
69 : _allocator(a), readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
71 GNAMemRequestsQueue & readonly() {
72 return readOnlyFrontEnd;
76 * @brief calculates size required for all requests, allocates memory and updates pointers
79 // 1st stage -- looking for expandable bind requests:
80 for (auto &originated : _future_heap) {
81 if (originated._type == REQUEST_BIND) continue;
83 iterate_binded(originated, [&](MemRequest & reference, MemRequest & binded) {
84 if (&originated == &reference) {
87 offset += binded._offset;
88 auto current = offset + ALIGN(binded._num_elements * binded._element_size, binded._alignment);
89 auto original_no_pad = ALIGN(originated._num_elements * originated._element_size, originated._alignment);
90 auto original_with_pad = ALIGN(originated._num_elements * originated._element_size + originated._padding, originated._alignment);
92 originated._padding = ALIGN(std::max(original_with_pad, current), originated._alignment) - original_no_pad;
96 updateSectionsSizes();
98 _total = _rw_section_size + _ro_section_size;
100 // allocation with memory setting to 0 internally
101 heap = allocate(_total);
102 auto setupOffsets = [&](std::function<bool(MemRequest & request)> filter, size_t offset) {
103 for (auto &re : _future_heap) {
104 if (re._type == REQUEST_BIND) continue;
105 if (filter(re)) continue;
107 auto sz = re._element_size * re._num_elements;
109 if (re._ptr_out != nullptr) {
110 auto cptr = heap.get() + offset;
111 *reinterpret_cast<void **>(re._ptr_out) = cptr;
112 // std::cout << "ALLOCATED=" << cptr << ", size=" << re._element_size * re._num_elements << "\n";
113 iterate_binded(re, [](MemRequest & reference, MemRequest & binded) {
114 *reinterpret_cast<void **>(binded._ptr_out) =
115 binded._offset + reinterpret_cast<uint8_t *>(*reinterpret_cast<void **>(reference._ptr_out));
118 // std::cout << "size=" << ALIGN(sz, re._alignment) << "\n" << std::flush;
121 case REQUEST_ALLOCATE :break;
122 case REQUEST_STORE : {
123 if (re._ptr_in != nullptr) {
124 memcpy(cptr, re._ptr_in, sz);
127 for (int i = 0; i < re._num_elements; i++, of += re._element_size) {
128 std::copy(std::begin(re._data), std::end(re._data), cptr + of);
133 case REQUEST_INITIALIZER : {
134 re._initializer(cptr, sz);
140 offset += ALIGN(sz + re._padding, re._alignment);
144 setupOffsets([](MemRequest & request) {
145 return request._region != REGION_RW;
148 setupOffsets([](MemRequest & request) {
149 return request._region != REGION_RO;
150 }, _rw_section_size);
157 size_t getRWBytes() {
158 updateSectionsSizes();
159 return _rw_section_size;
162 size_t getTotalBytes() {
163 updateSectionsSizes();
168 rRegion regionType() const override {
171 std::vector<MemRequest> & futureHeap() override {
174 std::list<std::vector<char>> &localStorage() override {
175 return _local_storage;
179 void iterate_binded(MemRequest & reference, const T & visitor) {
180 for (auto &re : _future_heap) {
181 if (re._type == REQUEST_BIND && re._ptr_in == reference._ptr_out) {
182 // std::cout << " [binded=" << re._ptr_out <<"]\n";
183 visitor(reference, re);
184 // TODO: no circular dependency checking, only tree-style dependency supported
185 iterate_binded(re, visitor);
191 std::shared_ptr<uint8_t> allocate(size_t bytes) {
192 std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
193 _allocator.deallocate(p, bytes);
195 std::fill(sp.get(), sp.get() + bytes, 0);
200 void updateSectionsSizes() {
201 // count total size and size of read/write regions
202 _rw_section_size = 0;
203 _ro_section_size = 0;
204 for (auto &re : _future_heap) {
205 auto current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
206 #ifdef GNA_HEAP_PROFILER
207 std::cout << "chunk: " << " region: " << re._region << ", " <<
208 "type: " << (re._type == REQUEST_STORE ? "store " : re._type == REQUEST_BIND ? "bind " : "alloc ") <<
209 std::setw(10) << re._num_elements << ", " <<
210 static_cast<int>(re._element_size) << ", " <<
211 re._padding << ", " <<
212 re._offset << ", " <<
213 re._alignment << std::endl;
215 if (re._type == REQUEST_BIND) continue;
217 if (re._region == REGION_RW) {
218 _rw_section_size += current;
220 _ro_section_size += current;
223 _rw_section_size = ALIGN(_rw_section_size, _page_alignment);
224 _ro_section_size = ALIGN(_ro_section_size, _page_alignment);
227 } // namespace GNAPluginNS