nntrainer/tensor/tensor_pool.cpp

   1 // SPDX-License-Identifier: Apache-2.0
   2 /**
   3  * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
   4  *
   5  * @file   tensor_pool.cpp
   6  * @date   19 Aug 2021
   7  * @brief  This is TensorPool for all requested tensors
   8  * @see    https://github.com/nnstreamer/nntrainer
   9  * @author Parichay Kapoor <pk.kapoor@samsung.com>
  10  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
  11  * @bug    No known bugs except for NYI items
  12  *
  13  * @todo   add checks for request/updates that finalize is not done
  14  * @todo   check before allocate that finalize is done
  15  */
  16
  17 #include <memory_pool.h>
  18 #include <nntrainer_log.h>
  19 #include <tensor.h>
  20 #include <tensor_pool.h>
  21 #include <tensor_wrap_specs.h>
  22 #include <util_func.h>
  23
  24 namespace nntrainer {
  25
  26 /**
  27  * @brief     Request tensor with the given spec
  28  *
  29  * @note returns empty tensor which will be filled when allocate is called.
  30  * @note we assume that the caller checks if the exec_order and lifespan are
  31  * compatible.
  32  */
  33 Tensor *TensorPool::request(const std::string &name, const TensorDim &dim,
  34                             const std::vector<unsigned int> &exec_order,
  35                             TensorLifespan lifespan,
  36                             const Tensor::Initializer &init,
  37                             bool is_weight_grad) {
  38   return registerRequestSpec(
  39     {is_weight_grad, std::make_unique<Tensor>(dim, false, init, name),
  40      TensorPool::SourceDetails{0, lifespan, exec_order, {}}});
  41 }
  42
  43 /**
  44  * @brief     Request tensor with the given spec
  45  *
  46  * @note returns empty tensor which will be filled when allocate is called.
  47  */
  48 Tensor *TensorPool::placeholder(const std::string &name, const TensorDim &dim) {
  49   return request(name, dim, {}, TensorLifespan::UNMANAGED);
  50 }
  51
  52 /**
  53  * @brief     Request tensor which has been already requested with the given
  54  * spec
  55  *
  56  * @note returns empty tensor which will be filled when allocate is called.
  57  * @note we assume that the caller checks if the exec_order and lifespan are
  58  * compatible.
  59  */
  60 Tensor *TensorPool::view(const std::string &name, const std::string &reference,
  61                          const TensorDim &dim,
  62                          const std::vector<unsigned int> &exec_order,
  63                          TensorLifespan lifespan, const size_t offset) {
  64   auto &spec = getSourceSpec(reference);
  65   unsigned adjusted_offset = std::visit(
  66     [](const auto &s) {
  67       using T = std::decay_t<decltype(s)>;
  68       if constexpr (std::is_same_v<T, SourceDetails>) {
  69         return 0u;
  70       } else if constexpr (std::is_same_v<T, DependentDetails>) {
  71         return s.offset;
  72       }
  73       return 0u;
  74     },
  75     pool[name_map.at(reference)].details);
  76   adjusted_offset += offset;
  77
  78   NNTR_THROW_IF(spec.tensor->getDim().getDataLen() <
  79                   adjusted_offset + dim.getDataLen(),
  80                 std::invalid_argument)
  81     << "view tensor size + offset > source tensor size, view tensor size: "
  82     << dim.getDataLen() << " offset: " << adjusted_offset
  83     << " source tensor: " << spec.tensor->getDim().getDataLen()
  84     << " name: " << spec.tensor->getName();
  85
  86   expandLifespan(spec, exec_order, lifespan);
  87   std::get<SourceDetails>(spec.details).dependents.push_back(pool.size());
  88
  89   /** @note below invalidates spec reference */
  90   /** @note in case of view of view, internal datastructure saves the src to
  91    * view index, not view to view reference in order to flatten depth */
  92   auto parent_idx = name_map.at(spec.tensor->getName());
  93
  94   /** @note default is_weight_grad for view is false. view is for the
  95    * activation. */
  96   return registerRequestSpec(
  97     {false,
  98      std::make_unique<Tensor>(dim, false, Tensor::Initializer::NONE, name),
  99      TensorPool::DependentDetails{parent_idx, adjusted_offset}});
 100 }
 101
 102 /**
 103  * @brief finalize the requested tensors
 104  *
 105  * @details finalize the requested tensors, request memory for them and plan
 106  * layout for their allocations.
 107  */
 108 void TensorPool::finalize(const MemoryPlanner &planner,
 109                           unsigned int start_order, unsigned int end_order) {
 110   mem_pool->clear();
 111   unsigned int bytes_requested = 0;
 112   /** if execution order is PERSIST_END_ORDER, then we think it has another
 113    * execution order for gradient clipping
 114    *  persist_end_order is for checking if the end order is updated */
 115   bool persist_end_order = false;
 116   unsigned int old_end_order = end_order;
 117   for (auto &spec : pool) {
 118     auto details = std::get_if<SourceDetails>(&spec.details);
 119     if (!details || details->lifespan == TensorLifespan::UNMANAGED ||
 120         details->exec_order.empty()) {
 121       continue;
 122     }
 123     details->token = 0;
 124
 125     /**
 126      * 1. create the validity ranges for the all the requested tensors.
 127      * validity_start/validity_end should be a value in the exec order of the
 128      * given tensor or a value out of range so as to not request memory for this
 129      * tensor
 130      */
 131     unsigned int validity_start = end_order + 1;
 132     for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
 133       if (details->exec_order[idx] >= start_order)
 134         validity_start = std::min(validity_start, details->exec_order[idx]);
 135       /** This is to enforce not to reach if the execution order is greater than
 136        *  backwarding end order.
 137        *  e.g., for the input layer, the backwarding is not reached but the
 138        * exeuction order is assigned.
 139        * */
 140       if (details->exec_order[idx] > old_end_order &&
 141           details->exec_order[idx] != PERSIST_END_ORDER) {
 142         details->exec_order[idx] = PERSIST_END_ORDER - 1;
 143       }
 144     }
 145
 146     unsigned int validity_end = validity_start;
 147     for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
 148       if (details->exec_order[idx] == PERSIST_END_ORDER) {
 149         if (!persist_end_order) {
 150           end_order = end_order + 1;
 151           persist_end_order = true;
 152         }
 153         validity_end = end_order;
 154         details->exec_order[idx] = validity_end;
 155         break;
 156       }
 157
 158       if (details->exec_order[idx] <= end_order) {
 159         validity_end = std::max(validity_end, details->exec_order[idx]);
 160       }
 161     }
 162
 163     /**
 164      * use lifespan to update the validity.
 165      * if the validity is long term, the tensor must stay valid for the
 166      * complete duration.
 167      */
 168     if (isTensorLongTerm(details->lifespan)) {
 169       validity_start = start_order;
 170       validity_end = end_order;
 171     }
 172
 173     /** 2. for each tensor request if it is in the provided range */
 174     if (validity_end < start_order || validity_start > end_order)
 175       continue;
 176
 177     /**
 178      * 3. requestMemory for all the tensors and set their tokens
 179      * @note +1 is to make the validity_end exlusive in the interval range
 180      */
 181     details->token = mem_pool->requestMemory(
 182       spec.tensor->bytes(), validity_start, validity_end + 1,
 183       details->exec_order, details->lifespan, spec.is_weight_grad);
 184 #ifdef DEBUG
 185     if (details->token == 0)
 186       throw std::runtime_error("Received invalid token from memory pool");
 187 #endif
 188
 189     bytes_requested += spec.tensor->bytes();
 190   }
 191
 192   /** 4. finalizeLayout for the memory pool. */
 193   if (bytes_requested > 0) {
 194     double efficiency = mem_pool->planLayout(planner);
 195     ml_logd("Memory layout efficiency = %lf", efficiency);
 196   }
 197 }
 198
 199 /**
 200  * @brief Set the batch size for the inputs/outputs of the layers
 201  */
 202 void TensorPool::setBatchSize(const std::string &name, unsigned int batch) {
 203   if (name_map.find(name) == name_map.end())
 204     throw std::invalid_argument("Requested tensor not found");
 205
 206   pool[name_map[name]].tensor->updateBatch(batch);
 207 }
 208
 209 /**
 210  * @brief Allocate memory for all the managed tensors
 211  */
 212 void TensorPool::allocate() {
 213   if (minMemoryRequirement() == 0)
 214     return;
 215   mem_pool->allocate();
 216
 217   /** set the pointers using the token for all the tensors */
 218   for (auto &spec : pool) {
 219     auto details = std::get_if<SourceDetails>(&spec.details);
 220     if (!details || details->token == 0) {
 221       continue;
 222     }
 223     spec.tensor->setData(mem_pool->getMemory(details->token), 0, true);
 224     syncDependents(spec);
 225   }
 226
 227   if (cache_loader)
 228     cache_loader->init();
 229 }
 230
 231 /**
 232  * @brief Deallocate memory for all the managed tensors
 233  */
 234 void TensorPool::deallocate() {
 235   if (cache_loader)
 236     cache_loader->finish();
 237
 238   mem_pool->deallocate();
 239
 240   /** nullify the data pointers for the tensors */
 241   for (auto &spec : pool) {
 242     spec.tensor->setData(nullptr);
 243   }
 244 }
 245
 246 const std::vector<unsigned int> &
 247 TensorPool::getExecutionOrder(const std::string &name) {
 248   return std::get<SourceDetails>(getSourceSpec(name).details).exec_order;
 249 }
 250
 251 /**
 252  * @brief     Expand the lifespan of the tensor with the given name
 253  *
 254  */
 255 TensorPool::RequestSpec &
 256 TensorPool::expandLifespan(const std::string &name,
 257                            const std::vector<unsigned> &exec_order,
 258                            TensorLifespan lifespan) {
 259   auto &spec = getSourceSpec(name);
 260   expandLifespan(spec, exec_order, lifespan);
 261   return spec;
 262 }
 263
 264 void TensorPool::expandLifespan(RequestSpec &spec,
 265                                 const std::vector<unsigned int> &exec_order,
 266                                 TensorLifespan lifespan) {
 267   auto &details = std::get<SourceDetails>(spec.details);
 268   NNTR_THROW_IF((details.lifespan != TensorLifespan::UNMANAGED &&
 269                  lifespan == TensorLifespan::UNMANAGED),
 270                 std::invalid_argument)
 271     << "Extending to lifespan to unmanaged is not possible for name: "
 272     << spec.tensor->getName();
 273
 274   if (details.lifespan != TensorLifespan::UNMANAGED) {
 275     /// update only if lifespan is unmanaged
 276     details.lifespan =
 277       enum_class_or<TensorLifespan>(details.lifespan, lifespan);
 278   }
 279   details.exec_order.insert(details.exec_order.end(), exec_order.begin(),
 280                             exec_order.end());
 281 }
 282
 283 void TensorPool::syncDependents(const RequestSpec &spec) {
 284   /// @note syncing dependents of dependents is invalid and will throw.
 285   auto &dependents = std::get<SourceDetails>(spec.details).dependents;
 286   for (auto &dep : dependents) {
 287     auto &dep_spec = pool.at(dep);
 288     auto offset = std::get<DependentDetails>(dep_spec.details).offset;
 289
 290     dep_spec.tensor->setData(spec.tensor->getMemoryData(),
 291                              spec.tensor->getOffset() + offset);
 292   }
 293 }
 294
 295 Tensor *TensorPool::registerRequestSpec(RequestSpec &&spec) {
 296   auto &name = spec.tensor->getName();
 297   if (name_map.find(name) != name_map.end())
 298     throw std::invalid_argument("Cannot request tensor with same name");
 299
 300   if (spec.tensor->empty())
 301     throw std::invalid_argument("Cannot request tensor with size 0");
 302
 303   if (name.empty())
 304     throw std::invalid_argument("Cannot request tensor with empty name");
 305
 306   pool.push_back(std::move(spec));
 307   name_map[name] = pool.size() - 1;
 308
 309   return pool.back().tensor.get();
 310 }
 311
 312 TensorPool::RequestSpec &TensorPool::getSourceSpec(const std::string &name) {
 313   RequestSpec *rs = &pool.at(name_map.at(name));
 314   while (auto dep_details = std::get_if<DependentDetails>(&rs->details)) {
 315     rs = &pool.at(dep_details->parent_idx);
 316   }
 317
 318   return *rs;
 319 }
 320
 321 void TensorPool::fillPlaceholder(const std::string &name, const Tensor &t) {
 322   auto &spec = getSourceSpec(name);
 323   auto &details = std::get<SourceDetails>(spec.details);
 324   NNTR_THROW_IF(details.lifespan != TensorLifespan::UNMANAGED,
 325                 std::invalid_argument)
 326     << "Cannot set external tensor for non-zero lifespan for " << name;
 327
 328   NNTR_THROW_IF(t.size() == 0 && t.getData(), std::invalid_argument)
 329     << "Error: setting invalid external tensor size 0 for " << name;
 330
 331   NNTR_THROW_IF(t.size() != 0 && t.size() < spec.tensor->size(),
 332                 std::invalid_argument)
 333     << "Error: setting external tensor of smaller size for "
 334     << spec.tensor->getName() << "(maybe view of " << name << ")";
 335
 336   spec.tensor->setData(t.getMemoryData(), t.getOffset());
 337   syncDependents(spec);
 338 }
 339
 340 Tensor *TensorPool::extend(const std::string &name, const TensorDim &dim,
 341                            const std::vector<unsigned int> &exec_order,
 342                            TensorLifespan lifespan) {
 343   NNTR_THROW_IF(!tensorExist(name), std::invalid_argument)
 344     << " cannot extend tensor which does not exist, name: " << name;
 345   auto &spec = getSourceSpec(name);
 346   NNTR_THROW_IF(dim != spec.tensor->getDim(), std::invalid_argument)
 347     << "Cannot extend tensor with different dimension";
 348   spec.is_weight_grad = false;
 349   expandLifespan(spec, exec_order, lifespan);
 350   return getTensor(name);
 351 }
 352
 353 Tensor *TensorPool::requestOrExtend(const std::string &name,
 354                                     const TensorDim &dim,
 355                                     const std::vector<unsigned int> &exec_order,
 356                                     TensorLifespan lifespan,
 357                                     const Tensor::Initializer &init) {
 358   NNTR_THROW_IF(lifespan == TensorLifespan::UNMANAGED, std::invalid_argument)
 359     << "unmanaged life span is not supported";
 360
 361   if (tensorExist(name)) {
 362     Tensor *t = getTensor(name);
 363     NNTR_THROW_IF(t->getDim() != dim, std::invalid_argument)
 364       << "tensor dimension mismatch for requestOrExtend name: " << name;
 365     NNTR_THROW_IF(t->getInitializer() != init, std::invalid_argument)
 366       << "tensor initializer mismatch for requestOrExtend name: " << name;
 367     return extend(name, dim, exec_order, lifespan);
 368   } else {
 369     return request(name, dim, exec_order, lifespan, init);
 370   }
 371 }
 372
 373 void TensorPool::reidentifySource(const std::string &dest,
 374                                   const std::string &new_src,
 375                                   unsigned int offset) {
 376   /// @todo add test
 377   /// source tensor of dest tensor becomes a view of new_src
 378   auto &old_spec = getSourceSpec(dest);
 379   auto &old_details = std::get<SourceDetails>(old_spec.details);
 380
 381   /// 1. extend new_src with old src
 382   auto &new_spec = getSourceSpec(new_src);
 383   expandLifespan(new_spec, old_details.exec_order, old_details.lifespan);
 384   auto &new_dependents = std::get<SourceDetails>(new_spec.details).dependents;
 385   new_dependents.insert(new_dependents.end(), old_details.dependents.begin(),
 386                         old_details.dependents.end());
 387
 388   /// 2. calcaulate base offset from the new_src
 389   auto new_parent_idx = name_map.at(new_src);
 390   unsigned base_offset = std::visit(
 391     [](const auto &s) {
 392       using T = std::decay_t<decltype(s)>;
 393       if constexpr (std::is_same_v<T, SourceDetails>) {
 394         return 0u;
 395       } else if constexpr (std::is_same_v<T, DependentDetails>) {
 396         return s.offset;
 397       }
 398       return 0u;
 399     },
 400     pool[new_parent_idx].details);
 401   base_offset += offset;
 402
 403   /// 3. transform parent idx/offset of old src's dependents base on the offset
 404   for (auto &dep : old_details.dependents) {
 405     auto &dep_spec = pool.at(dep);
 406     auto &details = std::get<DependentDetails>(dep_spec.details);
 407     details.offset += base_offset;
 408     details.parent_idx = new_parent_idx;
 409   }
 410
 411   /// 4. replace old details to dependent srcs
 412   old_spec.details = DependentDetails{new_parent_idx, base_offset};
 413 }
 414
 415 bool TensorPool::tensorExist(const std::string &name) {
 416   /// @todo consider use a helper function to check, eg) something like
 417   /// getTensor()
 418   return name_map.count(name);
 419 }
 420
 421 /**
 422  * @brief     Check if the lifespan leads to long term valitidy
 423  *
 424  */
 425 bool TensorPool::isTensorLongTerm(const TensorLifespan &lifespan) {
 426   switch (lifespan) {
 427   case TensorLifespan::EPOCH_LIFESPAN:
 428     [[fallthrough]];
 429   case TensorLifespan::MAX_LIFESPAN:
 430     return true;
 431   case TensorLifespan::FORWARD_FUNC_LIFESPAN:
 432     [[fallthrough]];
 433   case TensorLifespan::BACKWARD_FUNC_LIFESPAN:
 434     [[fallthrough]];
 435   case TensorLifespan::ITERATION_LIFESPAN:
 436     [[fallthrough]];
 437   case TensorLifespan::UNMANAGED:
 438     [[fallthrough]];
 439   default:
 440     return false;
 441   }
 442 }
 443
 444 void TensorPool::flushCache() {
 445   if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
 446     pool->flush();
 447 }
 448
 449 void TensorPool::flushCacheExcept(unsigned int order) {
 450   if (auto pool = dynamic_cast<CachePool *>(mem_pool.get()))
 451     pool->flushExcept(order);
 452 }
 453
 454 void TensorPool::loadCacheExec(unsigned int order) {
 455   if (dynamic_cast<CachePool *>(mem_pool.get()))
 456     cache_loader->load(order);
 457 }
 458
 459 int TensorPool::loadCacheExecAsync(
 460   unsigned int order, TaskExecutor::CompleteCallback complete_callback) {
 461   if (dynamic_cast<CachePool *>(mem_pool.get()))
 462     return cache_loader->loadAsync(order, complete_callback);
 463   else
 464     return -1;
 465 }
 466
 467 void TensorPool::loadCacheCancel(int id) {
 468   if (dynamic_cast<CachePool *>(mem_pool.get()) == nullptr)
 469     return;
 470
 471   cache_loader->cancelAsync(id);
 472 }
 473
 474 } // namespace nntrainer