nntrainer/tensor/manager.h

   1 // SPDX-License-Identifier: Apache-2.0
   2 /**
   3  * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
   4  *
   5  * @file   manager.h
   6  * @date   30 Nov 2020
   7  * @brief  This is NNtrainer manager for all weights, i/o and intermediate
   8  * tensors
   9  * @see    https://github.com/nnstreamer/nntrainer
  10  * @author Parichay Kapoor <pk.kapoor@samsung.com>
  11  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
  12  * @bug    No known bugs except for NYI items
  13  *
  14  *
  15  * @details Manager assumes that the layer inouts are being tracked by the
  16  * manager in the order of the execution. If the order is not maintained, then
  17  * the optimizations cannot be performed and will result in wrong values.
  18  */
  19
  20 #ifndef __MANAGER_H__
  21 #define __MANAGER_H__
  22 #include "tensor.h"
  23 #include "tensor_wrap_specs.h"
  24 #ifdef __cplusplus
  25
  26 #include <functional>
  27 #include <memory>
  28 #include <unordered_map>
  29 #include <utility>
  30 #include <vector>
  31
  32 #include <basic_planner.h>
  33 #include <graph_node.h>
  34 #include <tensor_pool.h>
  35 #include <var_grad.h>
  36 #include <weight.h>
  37
  38 namespace nntrainer {
  39
  40 /**
  41  * @class MMappedMemory
  42  * @brief Memory Handler, that has mmaped memory with a file descriptor
  43  */
  44 class MMapedMemory {
  45 public:
  46   /**
  47    * @brief Construct a new MMapedMemory object
  48    *
  49    * @param size bytesize of the memory chunk
  50    * @param allocate_fd_ map a shared memory object to a file
  51    */
  52   MMapedMemory(size_t size, bool allocate_fd_ = false);
  53
  54   /**
  55    * @brief Destroy the MMapedMemory object
  56    *
  57    */
  58   ~MMapedMemory() noexcept;
  59
  60   /**
  61    * @brief Construct a new MMapedMemory object (deleted)
  62    *
  63    */
  64   MMapedMemory(const MMapedMemory &) = delete;
  65
  66   /**
  67    * @brief Copy assignment operator (deleted)
  68    *
  69    */
  70   MMapedMemory &operator=(const MMapedMemory &) = delete;
  71
  72   /**
  73    * @brief Get the File descriptor.
  74    * Will return -1 except for android
  75    * @todo make this available for other platforms
  76    *
  77    * @return -1 if fd is not allocated (or unabled to allocate)
  78    */
  79   int getFd() noexcept { return fd; }
  80
  81   /**
  82    * @brief get the size of managed memory
  83    *
  84    * @return size_t size
  85    */
  86   size_t size() noexcept { return buf_size; }
  87
  88   /**
  89    * @brief get Typed buffer from the memory
  90    *
  91    * @tparam T Type to specify the buffer. return is reinterpreted to T*
  92    * @return T* Typed buffer, return nullptr if empty
  93    */
  94   template <typename T> T *typedBuffer() noexcept {
  95     return reinterpret_cast<T *>(buf);
  96   }
  97
  98   void *data() noexcept { return typedBuffer<void>(); }
  99
 100 private:
 101   int fd;           /**< fd to access the shared_memory  */
 102   void *buf;        /**< buffer object when use_shared_memory */
 103   size_t buf_size;  /**< buffer size */
 104   bool allocate_fd; /**< option to choose to allocate an fd */
 105 };
 106
 107 /**
 108  * @class   Manager
 109  * @brief   manager of nntrainer
 110  */
 111 class Manager {
 112
 113 public:
 114   /**
 115    * @brief Tensor Group Type
 116    * @note this is not mutually exclusive list, a tensor might be identified as
 117    * input as well as output
 118    *
 119    */
 120   enum TensorGroupType {
 121     INPUT = 0,   /**< Input of an operation */
 122     OUTPUT = 1,  /**< Output of an operation */
 123     WEIGHT = 2,  /**< Weight of an operation */
 124     TENSORS = 3, /**< Extra states of an operation */
 125   };
 126
 127   constexpr inline static unsigned NUM_TENSOR_GROUP_TYPE =
 128     4; /**< number of tensor group type */
 129
 130   /**
 131    * @brief     Constructor of Manager
 132    */
 133   Manager() : enable_optimizations(true) {}
 134
 135   /**
 136    * @brief     Constructor of Manager
 137    */
 138   Manager(bool enable_swap, const std::string &swap_path = "") :
 139     weight_pool(enable_swap, swap_path, "weight_pool"),
 140     tensor_pool(enable_swap, swap_path, "tensor_pool"),
 141     enable_optimizations(true) {}
 142
 143   /**
 144    * @brief Construct a new Manager object (deleted)
 145    *
 146    */
 147   Manager(const Manager &) = delete;
 148
 149   /**
 150    * @brief Copy Assign a new Manager object (deleted)
 151    *
 152    */
 153   Manager &operator=(const Manager &) = delete;
 154
 155   /**
 156    * @brief Move Construct a new Manager object
 157    *
 158    */
 159   Manager(Manager &&) noexcept = default;
 160
 161   /**
 162    * @brief Move assign a new Manager object
 163    *
 164    * @return Manager& reference to newly assign
 165    */
 166   Manager &operator=(Manager &&) noexcept = default;
 167
 168   /**
 169    * @brief     Destructor of Manager
 170    */
 171   ~Manager() = default;
 172
 173   /**
 174    * @brief     Create weights with the given spec
 175    * @todo      The max_exec_order can be reduced to the max exec order which
 176    * updates gradient
 177    *
 178    * @param node Graph node to extract node identifiers/info
 179    * @param weights_spec Specification for the weights
 180    * @param trainable make the weight trainable if true
 181    * @param shared_names name to refer to when the weights are borrowed from the
 182    * original source. if not shared pass empty vector
 183    *
 184    * @return created weights list
 185    */
 186   std::vector<Weight *>
 187   requestWeights(const GraphNode &node,
 188                  const std::vector<Weight::Spec> &weights_spec, bool trainable,
 189                  const std::vector<std::string> &shared_names);
 190
 191   /**
 192    * @brief     Create tensors with the given spec
 193    *
 194    * @param node Graph node to extract node identifiers/info
 195    * @param tensors_spec Specification for the tensors
 196    * @param trainable make the weight trainable if true
 197    * @param shared_names if tensor is shared, name is needed
 198    *
 199    * @return created tensors list
 200    */
 201   std::vector<Var_Grad *> requestTensors(
 202     const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
 203     bool trainable, const std::vector<std::string> &shared_names = {});
 204
 205   /**
 206    * @brief     Create tensors with the given spec
 207    *
 208    * @param node Graph node to extract node identifiers/info
 209    * @param tensors_spec Specification for the tensors
 210    *
 211    * @return created tensors list
 212    */
 213   std::vector<Tensor *> requestWeightOptimizerVariables(
 214     const std::vector<TensorDim> &dims, const std::string &name,
 215     const TensorLifespan &lifespan,
 216     Tensor::Initializer initializer = Tensor::Initializer::NONE);
 217
 218   /**
 219    * @brief     Create tensors with the given spec
 220    *
 221    * @param node Graph node to extract node identifiers/info
 222    * @param inputs_dim Specification for the tensors
 223    * @param outputs_name Name of the already requested output tensors
 224    *
 225    * @return created tensors list
 226    *
 227    * @details create Var_Grads to be used as input of GraphNode with the
 228    * inputs_dim as their spec. If the outputs_name is provided, the returned
 229    * Var_Grad share tensors with the already allocated Var_Grad for outputs,
 230    * named with outputs_name. In this case, the input_dim and the shape of the
 231    * output_tensors must match. If the outputs_name are empty, then new tensors
 232    * will be allocated.
 233    */
 234   std::vector<Var_Grad *>
 235   requestInputs(const GraphNode &node, const std::vector<TensorDim> &inputs_dim,
 236                 const std::vector<std::string> &outputs_name = {});
 237
 238   /**
 239    * @brief     Get all the weights which match the above condition
 240    *
 241    * @return    return the weights with satisfying the above condition
 242    */
 243   std::vector<Weight *>
 244   getWeights(const std::function<bool(const Weight *)> &condition = nullptr);
 245
 246   /**
 247    * @brief Get the Min Max of a tensor execution order
 248    *
 249    * @param name name of the tensor
 250    * @param is_weight check if this should be queried in weight pool
 251    * @return std::pair<unsigned int, unsigned int>
 252    */
 253   std::pair<unsigned int, unsigned int>
 254   getMinMaxTensorExecutionOrder(const std::string &name, bool is_weight);
 255
 256   /**
 257    * @brief Get the second max of a tensor execution order
 258    *
 259    * @param name name of the tensor
 260    * @param is_weight check if this should be queried in weight pool
 261    * @return 2nd max execution order value
 262    */
 263   unsigned int getSecondMaxTensorExecutionOrder(const std::string &name,
 264                                                 bool is_weight);
 265
 266   /**
 267    * @brief check if given execution order is the first access
 268    *
 269    * @param name tensor name
 270    * @param current_execution current execution
 271    * @param is_weight check if this should be queried in weight pool
 272    * @return bool true if given execution order first access
 273    */
 274   bool isFirstAccess(const std::string &name, unsigned current_execution,
 275                      bool is_weight = false);
 276
 277   /**
 278    * @brief check if given execution order is the last access
 279    *
 280    * @param name tensor name
 281    * @param current_execution current execution
 282    * @param is_weight check if this should be queried in weight pool
 283    * @return bool ture if given execution order is the last access
 284    */
 285   bool isLastAccess(const std::string &name, unsigned current_execution,
 286                     bool is_weight = false);
 287
 288   /**
 289    * @brief check if given execution order is the second last access
 290    *
 291    * @param name tensor name
 292    * @param current_execution current execution
 293    * @param is_weight check if this should be queried in weight pool
 294    * @return bool ture if given execution order is the second last access
 295    */
 296   bool isSecondLastAccess(const std::string &name, unsigned current_execution,
 297                           bool is_weight = false);
 298
 299   /**
 300    * @brief   Check if the manager has allocated tensors
 301    *
 302    * @return true if tensors allocated, else false
 303    */
 304   bool isAllocated() const { return tensor_pool.isAllocated(); }
 305
 306   /**
 307    * @brief Set the batch size for the inputs/outputs of the layers
 308    */
 309   void setBatchSize(unsigned int batch) {
 310     /**
 311      * All the tensors must be deallocated first by the called and then
 312      * allocated by the caller.
 313      */
 314     for (auto &in : inputs_v2)
 315       in->setBatchSize(batch);
 316     for (auto &out : outputs_v2)
 317       out->setBatchSize(batch);
 318   }
 319
 320   /**
 321    * @brief Set the batch size for the given tensor
 322    *
 323    * @note this does not works for weights as they are supposed to be
 324    * independent of batch size.
 325    */
 326   void setBatchSize(const std::string &name, unsigned int batch) {
 327     tensor_pool.setBatchSize(name, batch);
 328   }
 329
 330   /**
 331    * @brief Allocate memory for all the managed tensors
 332    *
 333    * @param[in] max_exec_order The maximum order of execution to determine
 334    * memory layout
 335    *
 336    * @note Any requested tensor which is not used inside the max_exec_order is
 337    * not initialized and will not be allocated. The initialization uses a memory
 338    * planner to plan the layout of all the tensors which are used at least once
 339    * before the max_exec_order.
 340    */
 341   void allocateTensors(unsigned int max_exec_order_);
 342
 343   /**
 344    * @brief Deallocate memory for all the managed tensors
 345    */
 346   void deallocateTensors(bool dealloc_weights = false);
 347
 348   /**
 349    * @brief Allocate memory for all the managed weights
 350    *
 351    * @param[in] max_exec_order The maximum order of execution to determine
 352    * memory layout
 353    *
 354    * @note Any requested tensor which is not used inside the max_exec_order is
 355    * not initialized and will not be allocated. The initialization uses a memory
 356    * planner to plan the layout of all the tensors which are used at least once
 357    * before the max_exec_order.
 358    *
 359    * @note this will make requests to the tensor pool and allocate the
 360    * corresponding weights
 361    */
 362   void allocateWeights(unsigned int max_exec_order_);
 363
 364   /**
 365    * @brief Deallocate memory for all the weights
 366    */
 367   void deallocateWeights();
 368
 369   /**
 370    * @brief Set optimizations for manager
 371    *
 372    * @param val true to enable, else false
 373    */
 374   void setOptimizations(bool val) { enable_optimizations = val; }
 375
 376   /**
 377    * @brief Update externally dependent tensors
 378    *
 379    * @param name Name of the tensor
 380    * @param t External tensor
 381    */
 382   void fillPlaceholder(const std::string &name, const Tensor &t) {
 383     tensor_pool.fillPlaceholder(name, t);
 384   }
 385
 386   /**
 387    * @brief Get the tensor of the given name
 388    *
 389    * @return ptr to the tensor with the given
 390    * @throws if no tensor is found with the given name
 391    */
 392   Tensor *getTensor(const std::string &name) {
 393     try {
 394       return tensor_pool.getTensor(name);
 395     } catch (...) {
 396       return weight_pool.getTensor(name);
 397     }
 398   }
 399
 400   /**
 401    * @brief request Tensor with weight specification
 402    *
 403    * @param spec specification
 404    * @param identify_as identify as tensor as a group
 405    * @return Tensor* tensor
 406    */
 407   Tensor *requestTensor(const WeightSpecV2 &spec, TensorGroupType identify_as);
 408
 409   /**
 410    * @brief request Tensor with variable + gradient specification
 411    *
 412    * @param spec specification
 413    * @param identify_as identify as tensor as a group
 414    * @param exec_order execution order to refer to
 415    * @param scope common scope to attach in front of current specification name
 416    * @param expose_var expose variable tensor out of graph, when allocation,
 417    * this tensor will be valid max_exec_order when allocation happens
 418    * @param expose_grad expose variable tensor out of graph, this tensor will be
 419    * valid max_exec_order when allocation happens
 420    * @return Tensor* tensor
 421    */
 422   Var_Grad *requestTensor(const VarGradSpecV2 &spec,
 423                           TensorGroupType identify_as,
 424                           const GraphNode::ExecutionOrder &exec_order,
 425                           const std::string &scope = "",
 426                           bool expose_var = false, bool expose_grad = false);
 427
 428   /**
 429    * @brief request vector of tensors with variable + gradient specification
 430    *
 431    * @param spec specification
 432    * @param identify_as identify as tensor as a group
 433    * @param exec_order execution order to refer to
 434    * @param scope common scope to attach in front of current specification name
 435    * @param expose_var expose variable tensor out of graph, when
 436    * allocation, this tensor will be valid max_exec_order when allocation
 437    * happens
 438    * @param expose_grad expose variable tensor out of graph, this tensor will be
 439    * valid max_exec_order when allocation happens
 440    * @return Tensor* tensor
 441    */
 442   std::vector<Var_Grad *> requestTensors(
 443     const std::vector<VarGradSpecV2> &specs, TensorGroupType identify_as,
 444     const GraphNode::ExecutionOrder &exec_order, const std::string &scope = "",
 445     bool expose_var = false, bool expose_grad = false);
 446
 447   /**
 448    * @brief flush cache data
 449    */
 450   void flushCache();
 451
 452   /**
 453    * @brief flush cache data except the order
 454    *
 455    * @param order except execution order
 456    */
 457   void flushCacheExcept(unsigned int order);
 458
 459 private:
 460   /** @todo: merge this list to one */
 461   std::vector<std::unique_ptr<Weight>> weights_v2; /**< weights for the layers
 462                                                     */
 463   std::vector<std::unique_ptr<Var_Grad>>
 464     inputs_v2; /**< inputs for the layers */
 465   std::vector<std::unique_ptr<Var_Grad>>
 466     outputs_v2; /**< outputs for the layers */
 467   std::vector<std::unique_ptr<Var_Grad>>
 468     tensors_v2; /**< extra tensors required by the layers */
 469
 470   std::array<std::vector<std::unique_ptr<Var_Grad>>, NUM_TENSOR_GROUP_TYPE>
 471     tensor_book; /**< reference to tensor book kept */
 472
 473   TensorPool weight_pool; /**< tensor pool to request tensors */
 474   TensorPool tensor_pool; /**< tensor pool to request tensors */
 475
 476   bool enable_optimizations; /**< to enable memory optimizations */
 477
 478   /**
 479    * @brief Finalize the given tensor pool
 480    *
 481    * @param pool Tensor pool to finalize
 482    * @param start Start execution order
 483    * @param end End execution order
 484    */
 485   void finalizeTensorPool(TensorPool &pool, unsigned int start,
 486                           unsigned int end);
 487 };
 488
 489 } // namespace nntrainer
 490
 491 #endif /* __cplusplus */
 492 #endif /* __MANAGER_H__ */