1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
5 * @file tensor_pool.cpp
7 * @brief This is TensorPool for all requested tensors
8 * @see https://github.com/nnstreamer/nntrainer
9 * @author Parichay Kapoor <pk.kapoor@samsung.com>
10 * @author Jihoon Lee <jhoon.it.lee@samsung.com>
11 * @bug No known bugs except for NYI items
13 * @todo add checks for request/updates that finalize is not done
14 * @todo check before allocate that finalize is done
17 #include <memory_pool.h>
18 #include <nntrainer_log.h>
20 #include <tensor_pool.h>
21 #include <tensor_wrap_specs.h>
22 #include <util_func.h>
27 * @brief Request tensor with the given spec
29 * @note returns empty tensor which will be filled when allocate is called.
30 * @note we assume that the caller checks if the exec_order and lifespan are
33 Tensor *TensorPool::request(const std::string &name, const TensorDim &dim,
34 const std::vector<unsigned int> &exec_order,
35 TensorLifespan lifespan,
36 const Tensor::Initializer &init,
37 bool is_weight_grad) {
38 return registerRequestSpec(
39 {is_weight_grad, std::make_unique<Tensor>(dim, false, init, name),
40 TensorPool::SourceDetails{0, lifespan, exec_order, {}}});
44 * @brief Request tensor with the given spec
46 * @note returns empty tensor which will be filled when allocate is called.
48 Tensor *TensorPool::placeholder(const std::string &name, const TensorDim &dim) {
49 return request(name, dim, {}, TensorLifespan::UNMANAGED);
53 * @brief Request tensor which has been already requested with the given
56 * @note returns empty tensor which will be filled when allocate is called.
57 * @note we assume that the caller checks if the exec_order and lifespan are
60 Tensor *TensorPool::view(const std::string &name, const std::string &reference,
62 const std::vector<unsigned int> &exec_order,
63 TensorLifespan lifespan, const size_t offset) {
64 auto &spec = getSourceSpec(reference);
65 unsigned adjusted_offset = std::visit(
67 using T = std::decay_t<decltype(s)>;
68 if constexpr (std::is_same_v<T, SourceDetails>) {
70 } else if constexpr (std::is_same_v<T, DependentDetails>) {
75 pool[name_map.at(reference)].details);
76 adjusted_offset += offset;
78 NNTR_THROW_IF(spec.tensor->getDim().getDataLen() <
79 adjusted_offset + dim.getDataLen(),
80 std::invalid_argument)
81 << "view tensor size + offset > source tensor size, view tensor size: "
82 << dim.getDataLen() << " offset: " << adjusted_offset
83 << " source tensor: " << spec.tensor->getDim().getDataLen()
84 << " name: " << spec.tensor->getName();
86 expandLifespan(spec, exec_order, lifespan);
87 std::get<SourceDetails>(spec.details).dependents.push_back(pool.size());
89 /** @note below invalidates spec reference */
90 /** @note in case of view of view, internal datastructure saves the src to
91 * view index, not view to view reference in order to flatten depth */
92 auto parent_idx = name_map.at(spec.tensor->getName());
94 /** @note default is_weight_grad for view is false. view is for the
96 return registerRequestSpec(
98 std::make_unique<Tensor>(dim, false, Tensor::Initializer::NONE, name),
99 TensorPool::DependentDetails{parent_idx, adjusted_offset}});
103 * @brief finalize the requested tensors
105 * @details finalize the requested tensors, request memory for them and plan
106 * layout for their allocations.
108 void TensorPool::finalize(const MemoryPlanner &planner,
109 unsigned int start_order, unsigned int end_order) {
111 unsigned int bytes_requested = 0;
112 /** if execution order is PERSIST_END_ORDER, then we think it has another
113 * execution order for gradient clipping
114 * persist_end_order is for checking if the end order is updated */
115 bool persist_end_order = false;
116 unsigned int old_end_order = end_order;
117 for (auto &spec : pool) {
118 auto details = std::get_if<SourceDetails>(&spec.details);
119 if (!details || details->lifespan == TensorLifespan::UNMANAGED ||
120 details->exec_order.empty()) {
126 * 1. create the validity ranges for the all the requested tensors.
127 * validity_start/validity_end should be a value in the exec order of the
128 * given tensor or a value out of range so as to not request memory for this
131 unsigned int validity_start = end_order + 1;
132 for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
133 if (details->exec_order[idx] >= start_order)
134 validity_start = std::min(validity_start, details->exec_order[idx]);
135 /** This is to enforce not to reach if the execution order is greater than
136 * backwarding end order.
137 * e.g., for the input layer, the backwarding is not reached but the
138 * exeuction order is assigned.
140 if (details->exec_order[idx] > old_end_order &&
141 details->exec_order[idx] != PERSIST_END_ORDER) {
142 details->exec_order[idx] = PERSIST_END_ORDER - 1;
146 unsigned int validity_end = validity_start;
147 for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
148 if (details->exec_order[idx] == PERSIST_END_ORDER) {
149 if (!persist_end_order) {
150 end_order = end_order + 1;
151 persist_end_order = true;
153 validity_end = end_order;
154 details->exec_order[idx] = validity_end;
158 if (details->exec_order[idx] <= end_order) {
159 validity_end = std::max(validity_end, details->exec_order[idx]);
164 * use lifespan to update the validity.
165 * if the validity is long term, the tensor must stay valid for the
168 if (isTensorLongTerm(details->lifespan)) {
169 validity_start = start_order;
170 validity_end = end_order;
173 /** 2. for each tensor request if it is in the provided range */
174 if (validity_end < start_order || validity_start > end_order)
178 * 3. requestMemory for all the tensors and set their tokens
179 * @note +1 is to make the validity_end exlusive in the interval range
181 details->token = mem_pool->requestMemory(
182 spec.tensor->bytes(), validity_start, validity_end + 1,
183 details->exec_order, details->lifespan, spec.is_weight_grad);
185 if (details->token == 0)
186 throw std::runtime_error("Received invalid token from memory pool");
189 bytes_requested += spec.tensor->bytes();
192 /** 4. finalizeLayout for the memory pool. */
193 if (bytes_requested > 0) {
194 double efficiency = mem_pool->planLayout(planner);
195 ml_logd("Memory layout efficiency = %lf", efficiency);
200 * @brief Set the batch size for the inputs/outputs of the layers
202 void TensorPool::setBatchSize(const std::string &name, unsigned int batch) {
203 if (name_map.find(name) == name_map.end())
204 throw std::invalid_argument("Requested tensor not found");
206 pool[name_map[name]].tensor->updateBatch(batch);
210 * @brief Allocate memory for all the managed tensors
212 void TensorPool::allocate() {
213 if (minMemoryRequirement() == 0)
215 mem_pool->allocate();
217 /** set the pointers using the token for all the tensors */
218 for (auto &spec : pool) {
219 auto details = std::get_if<SourceDetails>(&spec.details);
220 if (!details || details->token == 0) {
223 spec.tensor->setData(mem_pool->getMemory(details->token), 0, true);
224 syncDependents(spec);
228 cache_loader->init();
232 * @brief Deallocate memory for all the managed tensors
234 void TensorPool::deallocate() {
236 cache_loader->finish();
238 mem_pool->deallocate();
240 /** nullify the data pointers for the tensors */
241 for (auto &spec : pool) {
242 spec.tensor->setData(nullptr);
246 const std::vector<unsigned int> &
247 TensorPool::getExecutionOrder(const std::string &name) {
248 return std::get<SourceDetails>(getSourceSpec(name).details).exec_order;
252 * @brief Expand the lifespan of the tensor with the given name
255 TensorPool::RequestSpec &
256 TensorPool::expandLifespan(const std::string &name,
257 const std::vector<unsigned> &exec_order,
258 TensorLifespan lifespan) {
259 auto &spec = getSourceSpec(name);
260 expandLifespan(spec, exec_order, lifespan);
264 void TensorPool::expandLifespan(RequestSpec &spec,
265 const std::vector<unsigned int> &exec_order,
266 TensorLifespan lifespan) {
267 auto &details = std::get<SourceDetails>(spec.details);
268 NNTR_THROW_IF((details.lifespan != TensorLifespan::UNMANAGED &&
269 lifespan == TensorLifespan::UNMANAGED),
270 std::invalid_argument)
271 << "Extending to lifespan to unmanaged is not possible for name: "
272 << spec.tensor->getName();
274 if (details.lifespan != TensorLifespan::UNMANAGED) {
275 /// update only if lifespan is unmanaged
277 enum_class_or<TensorLifespan>(details.lifespan, lifespan);
279 details.exec_order.insert(details.exec_order.end(), exec_order.begin(),
283 void TensorPool::syncDependents(const RequestSpec &spec) {
284 /// @note syncing dependents of dependents is invalid and will throw.
285 auto &dependents = std::get<SourceDetails>(spec.details).dependents;
286 for (auto &dep : dependents) {
287 auto &dep_spec = pool.at(dep);
288 auto offset = std::get<DependentDetails>(dep_spec.details).offset;
290 dep_spec.tensor->setData(spec.tensor->getMemoryData(),
291 spec.tensor->getOffset() + offset);
295 Tensor *TensorPool::registerRequestSpec(RequestSpec &&spec) {
296 auto &name = spec.tensor->getName();
297 if (name_map.find(name) != name_map.end())
298 throw std::invalid_argument("Cannot request tensor with same name");
300 if (spec.tensor->empty())
301 throw std::invalid_argument("Cannot request tensor with size 0");
304 throw std::invalid_argument("Cannot request tensor with empty name");
306 pool.push_back(std::move(spec));
307 name_map[name] = pool.size() - 1;
309 return pool.back().tensor.get();
312 TensorPool::RequestSpec &TensorPool::getSourceSpec(const std::string &name) {
313 RequestSpec *rs = &pool.at(name_map.at(name));
314 while (auto dep_details = std::get_if<DependentDetails>(&rs->details)) {
315 rs = &pool.at(dep_details->parent_idx);
321 void TensorPool::fillPlaceholder(const std::string &name, const Tensor &t) {
322 auto &spec = getSourceSpec(name);
323 auto &details = std::get<SourceDetails>(spec.details);
324 NNTR_THROW_IF(details.lifespan != TensorLifespan::UNMANAGED,
325 std::invalid_argument)
326 << "Cannot set external tensor for non-zero lifespan for " << name;
328 NNTR_THROW_IF(t.size() == 0 && t.getData(), std::invalid_argument)
329 << "Error: setting invalid external tensor size 0 for " << name;
331 NNTR_THROW_IF(t.size() != 0 && t.size() < spec.tensor->size(),
332 std::invalid_argument)
333 << "Error: setting external tensor of smaller size for "
334 << spec.tensor->getName() << "(maybe view of " << name << ")";
336 spec.tensor->setData(t.getMemoryData(), t.getOffset());
337 syncDependents(spec);
340 Tensor *TensorPool::extend(const std::string &name, const TensorDim &dim,
341 const std::vector<unsigned int> &exec_order,
342 TensorLifespan lifespan) {
343 NNTR_THROW_IF(!tensorExist(name), std::invalid_argument)
344 << " cannot extend tensor which does not exist, name: " << name;
345 auto &spec = getSourceSpec(name);
346 NNTR_THROW_IF(dim != spec.tensor->getDim(), std::invalid_argument)
347 << "Cannot extend tensor with different dimension";
348 spec.is_weight_grad = false;
349 expandLifespan(spec, exec_order, lifespan);
350 return getTensor(name);
353 Tensor *TensorPool::requestOrExtend(const std::string &name,
354 const TensorDim &dim,
355 const std::vector<unsigned int> &exec_order,
356 TensorLifespan lifespan,
357 const Tensor::Initializer &init) {
358 NNTR_THROW_IF(lifespan == TensorLifespan::UNMANAGED, std::invalid_argument)
359 << "unmanaged life span is not supported";
361 if (tensorExist(name)) {
362 Tensor *t = getTensor(name);
363 NNTR_THROW_IF(t->getDim() != dim, std::invalid_argument)
364 << "tensor dimension mismatch for requestOrExtend name: " << name;
365 NNTR_THROW_IF(t->getInitializer() != init, std::invalid_argument)
366 << "tensor initializer mismatch for requestOrExtend name: " << name;
367 return extend(name, dim, exec_order, lifespan);
369 return request(name, dim, exec_order, lifespan, init);
373 void TensorPool::reidentifySource(const std::string &dest,
374 const std::string &new_src,
375 unsigned int offset) {
377 /// source tensor of dest tensor becomes a view of new_src
378 auto &old_spec = getSourceSpec(dest);
379 auto &old_details = std::get<SourceDetails>(old_spec.details);
381 /// 1. extend new_src with old src
382 auto &new_spec = getSourceSpec(new_src);
383 expandLifespan(new_spec, old_details.exec_order, old_details.lifespan);
384 auto &new_dependents = std::get<SourceDetails>(new_spec.details).dependents;
385 new_dependents.insert(new_dependents.end(), old_details.dependents.begin(),
386 old_details.dependents.end());
388 /// 2. calcaulate base offset from the new_src
389 auto new_parent_idx = name_map.at(new_src);
390 unsigned base_offset = std::visit(
392 using T = std::decay_t<decltype(s)>;
393 if constexpr (std::is_same_v<T, SourceDetails>) {
395 } else if constexpr (std::is_same_v<T, DependentDetails>) {
400 pool[new_parent_idx].details);
401 base_offset += offset;
403 /// 3. transform parent idx/offset of old src's dependents base on the offset
404 for (auto &dep : old_details.dependents) {
405 auto &dep_spec = pool.at(dep);
406 auto &details = std::get<DependentDetails>(dep_spec.details);
407 details.offset += base_offset;
408 details.parent_idx = new_parent_idx;
411 /// 4. replace old details to dependent srcs
412 old_spec.details = DependentDetails{new_parent_idx, base_offset};
415 bool TensorPool::tensorExist(const std::string &name) {
416 /// @todo consider use a helper function to check, eg) something like
418 return name_map.count(name);
422 * @brief Check if the lifespan leads to long term valitidy
425 bool TensorPool::isTensorLongTerm(const TensorLifespan &lifespan) {
427 case TensorLifespan::EPOCH_LIFESPAN:
429 case TensorLifespan::MAX_LIFESPAN:
431 case TensorLifespan::FORWARD_FUNC_LIFESPAN:
433 case TensorLifespan::BACKWARD_FUNC_LIFESPAN:
435 case TensorLifespan::ITERATION_LIFESPAN:
437 case TensorLifespan::UNMANAGED:
444 void TensorPool::flushCache() {
445 if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
449 void TensorPool::flushCacheExcept(unsigned int order) {
450 if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
451 pool->flushExcept(order);
454 void TensorPool::loadCacheExec(unsigned int order) {
455 if (dynamic_cast<CachePool *>(mem_pool.get()))
456 cache_loader->load(order);
459 int TensorPool::loadCacheExecAsync(
460 unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
461 if (dynamic_cast<CachePool *>(mem_pool.get()))
462 return cache_loader->loadAsync(order, complete_callback);
467 void TensorPool::loadCacheCancel(int id) {
468 if (dynamic_cast<CachePool *>(mem_pool.get()) == nullptr)
471 cache_loader->cancelAsync(id);
474 } // namespace nntrainer