1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
7 * @brief This is NNtrainer manager for all weights, i/o and intermediate
9 * @see https://github.com/nnstreamer/nntrainer
10 * @author Parichay Kapoor <pk.kapoor@samsung.com>
11 * @author Jihoon Lee <jhoon.it.lee@samsung.com>
12 * @bug No known bugs except for NYI items
15 * @details Manager assumes that the layer inouts are being tracked by the
16 * manager in the order of the execution. If the order is not maintained, then
17 * the optimizations cannot be performed and will result in wrong values.
23 #include "tensor_wrap_specs.h"
28 #include <unordered_map>
32 #include <basic_planner.h>
33 #include <graph_node.h>
34 #include <tensor_pool.h>
41 * @class MMappedMemory
42 * @brief Memory Handler, that has mmaped memory with a file descriptor
47 * @brief Construct a new MMapedMemory object
49 * @param size bytesize of the memory chunk
50 * @param allocate_fd_ map a shared memory object to a file
52 MMapedMemory(size_t size, bool allocate_fd_ = false);
55 * @brief Destroy the MMapedMemory object
58 ~MMapedMemory() noexcept;
61 * @brief Construct a new MMapedMemory object (deleted)
64 MMapedMemory(const MMapedMemory &) = delete;
67 * @brief Copy assignment operator (deleted)
70 MMapedMemory &operator=(const MMapedMemory &) = delete;
73 * @brief Get the File descriptor.
74 * Will return -1 except for android
75 * @todo make this available for other platforms
77 * @return -1 if fd is not allocated (or unabled to allocate)
79 int getFd() noexcept { return fd; }
82 * @brief get the size of managed memory
86 size_t size() noexcept { return buf_size; }
89 * @brief get Typed buffer from the memory
91 * @tparam T Type to specify the buffer. return is reinterpreted to T*
92 * @return T* Typed buffer, return nullptr if empty
94 template <typename T> T *typedBuffer() noexcept {
95 return reinterpret_cast<T *>(buf);
98 void *data() noexcept { return typedBuffer<void>(); }
101 int fd; /**< fd to access the shared_memory */
102 void *buf; /**< buffer object when use_shared_memory */
103 size_t buf_size; /**< buffer size */
104 bool allocate_fd; /**< option to choose to allocate an fd */
109 * @brief manager of nntrainer
115 * @brief Tensor Group Type
116 * @note this is not mutually exclusive list, a tensor might be identified as
117 * input as well as output
120 enum TensorGroupType {
121 INPUT = 0, /**< Input of an operation */
122 OUTPUT = 1, /**< Output of an operation */
123 WEIGHT = 2, /**< Weight of an operation */
124 TENSORS = 3, /**< Extra states of an operation */
127 constexpr inline static unsigned NUM_TENSOR_GROUP_TYPE =
128 4; /**< number of tensor group type */
131 * @brief Constructor of Manager
133 Manager() : enable_optimizations(true) {}
136 * @brief Constructor of Manager
138 Manager(bool enable_swap, const std::string &swap_path = "") :
139 weight_pool(enable_swap, swap_path, "weight_pool"),
140 tensor_pool(enable_swap, swap_path, "tensor_pool"),
141 enable_optimizations(true) {}
144 * @brief Construct a new Manager object (deleted)
147 Manager(const Manager &) = delete;
150 * @brief Copy Assign a new Manager object (deleted)
153 Manager &operator=(const Manager &) = delete;
156 * @brief Move Construct a new Manager object
159 Manager(Manager &&) noexcept = default;
162 * @brief Move assign a new Manager object
164 * @return Manager& reference to newly assign
166 Manager &operator=(Manager &&) noexcept = default;
169 * @brief Destructor of Manager
171 ~Manager() = default;
174 * @brief Create weights with the given spec
175 * @todo The max_exec_order can be reduced to the max exec order which
178 * @param node Graph node to extract node identifiers/info
179 * @param weights_spec Specification for the weights
180 * @param trainable make the weight trainable if true
181 * @param shared_names name to refer to when the weights are borrowed from the
182 * original source. if not shared pass empty vector
184 * @return created weights list
186 std::vector<Weight *>
187 requestWeights(const GraphNode &node,
188 const std::vector<Weight::Spec> &weights_spec, bool trainable,
189 const std::vector<std::string> &shared_names);
192 * @brief Create tensors with the given spec
194 * @param node Graph node to extract node identifiers/info
195 * @param tensors_spec Specification for the tensors
196 * @param trainable make the weight trainable if true
197 * @param shared_names if tensor is shared, name is needed
199 * @return created tensors list
201 std::vector<Var_Grad *> requestTensors(
202 const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
203 bool trainable, const std::vector<std::string> &shared_names = {});
206 * @brief Create tensors with the given spec
208 * @param node Graph node to extract node identifiers/info
209 * @param tensors_spec Specification for the tensors
211 * @return created tensors list
213 std::vector<Tensor *> requestWeightOptimizerVariables(
214 const std::vector<TensorDim> &dims, const std::string &name,
215 const TensorLifespan &lifespan,
216 Tensor::Initializer initializer = Tensor::Initializer::NONE);
219 * @brief Create tensors with the given spec
221 * @param node Graph node to extract node identifiers/info
222 * @param inputs_dim Specification for the tensors
223 * @param outputs_name Name of the already requested output tensors
225 * @return created tensors list
227 * @details create Var_Grads to be used as input of GraphNode with the
228 * inputs_dim as their spec. If the outputs_name is provided, the returned
229 * Var_Grad share tensors with the already allocated Var_Grad for outputs,
230 * named with outputs_name. In this case, the input_dim and the shape of the
231 * output_tensors must match. If the outputs_name are empty, then new tensors
234 std::vector<Var_Grad *>
235 requestInputs(const GraphNode &node, const std::vector<TensorDim> &inputs_dim,
236 const std::vector<std::string> &outputs_name = {});
239 * @brief Get all the weights which match the above condition
241 * @return return the weights with satisfying the above condition
243 std::vector<Weight *>
244 getWeights(const std::function<bool(const Weight *)> &condition = nullptr);
247 * @brief Get the Min Max of a tensor execution order
249 * @param name name of the tensor
250 * @param is_weight check if this should be queried in weight pool
251 * @return std::pair<unsigned int, unsigned int>
253 std::pair<unsigned int, unsigned int>
254 getMinMaxTensorExecutionOrder(const std::string &name, bool is_weight);
257 * @brief Get the second max of a tensor execution order
259 * @param name name of the tensor
260 * @param is_weight check if this should be queried in weight pool
261 * @return 2nd max execution order value
263 unsigned int getSecondMaxTensorExecutionOrder(const std::string &name,
267 * @brief check if given execution order is the first access
269 * @param name tensor name
270 * @param current_execution current execution
271 * @param is_weight check if this should be queried in weight pool
272 * @return bool true if given execution order first access
274 bool isFirstAccess(const std::string &name, unsigned current_execution,
275 bool is_weight = false);
278 * @brief check if given execution order is the last access
280 * @param name tensor name
281 * @param current_execution current execution
282 * @param is_weight check if this should be queried in weight pool
283 * @return bool ture if given execution order is the last access
285 bool isLastAccess(const std::string &name, unsigned current_execution,
286 bool is_weight = false);
289 * @brief check if given execution order is the second last access
291 * @param name tensor name
292 * @param current_execution current execution
293 * @param is_weight check if this should be queried in weight pool
294 * @return bool ture if given execution order is the second last access
296 bool isSecondLastAccess(const std::string &name, unsigned current_execution,
297 bool is_weight = false);
300 * @brief Check if the manager has allocated tensors
302 * @return true if tensors allocated, else false
304 bool isAllocated() const { return tensor_pool.isAllocated(); }
307 * @brief Set the batch size for the inputs/outputs of the layers
309 void setBatchSize(unsigned int batch) {
311 * All the tensors must be deallocated first by the called and then
312 * allocated by the caller.
314 for (auto &in : inputs_v2)
315 in->setBatchSize(batch);
316 for (auto &out : outputs_v2)
317 out->setBatchSize(batch);
321 * @brief Set the batch size for the given tensor
323 * @note this does not works for weights as they are supposed to be
324 * independent of batch size.
326 void setBatchSize(const std::string &name, unsigned int batch) {
327 tensor_pool.setBatchSize(name, batch);
331 * @brief Allocate memory for all the managed tensors
333 * @param[in] max_exec_order The maximum order of execution to determine
336 * @note Any requested tensor which is not used inside the max_exec_order is
337 * not initialized and will not be allocated. The initialization uses a memory
338 * planner to plan the layout of all the tensors which are used at least once
339 * before the max_exec_order.
341 void allocateTensors(unsigned int max_exec_order_);
344 * @brief Deallocate memory for all the managed tensors
346 void deallocateTensors(bool dealloc_weights = false);
349 * @brief Allocate memory for all the managed weights
351 * @param[in] max_exec_order The maximum order of execution to determine
354 * @note Any requested tensor which is not used inside the max_exec_order is
355 * not initialized and will not be allocated. The initialization uses a memory
356 * planner to plan the layout of all the tensors which are used at least once
357 * before the max_exec_order.
359 * @note this will make requests to the tensor pool and allocate the
360 * corresponding weights
362 void allocateWeights(unsigned int max_exec_order_);
365 * @brief Deallocate memory for all the weights
367 void deallocateWeights();
370 * @brief Set optimizations for manager
372 * @param val true to enable, else false
374 void setOptimizations(bool val) { enable_optimizations = val; }
377 * @brief Update externally dependent tensors
379 * @param name Name of the tensor
380 * @param t External tensor
382 void fillPlaceholder(const std::string &name, const Tensor &t) {
383 tensor_pool.fillPlaceholder(name, t);
387 * @brief Get the tensor of the given name
389 * @return ptr to the tensor with the given
390 * @throws if no tensor is found with the given name
392 Tensor *getTensor(const std::string &name) {
394 return tensor_pool.getTensor(name);
396 return weight_pool.getTensor(name);
401 * @brief request Tensor with weight specification
403 * @param spec specification
404 * @param identify_as identify as tensor as a group
405 * @return Tensor* tensor
407 Tensor *requestTensor(const WeightSpecV2 &spec, TensorGroupType identify_as);
410 * @brief request Tensor with variable + gradient specification
412 * @param spec specification
413 * @param identify_as identify as tensor as a group
414 * @param exec_order execution order to refer to
415 * @param scope common scope to attach in front of current specification name
416 * @param expose_var expose variable tensor out of graph, when allocation,
417 * this tensor will be valid max_exec_order when allocation happens
418 * @param expose_grad expose variable tensor out of graph, this tensor will be
419 * valid max_exec_order when allocation happens
420 * @return Tensor* tensor
422 Var_Grad *requestTensor(const VarGradSpecV2 &spec,
423 TensorGroupType identify_as,
424 const GraphNode::ExecutionOrder &exec_order,
425 const std::string &scope = "",
426 bool expose_var = false, bool expose_grad = false);
429 * @brief request vector of tensors with variable + gradient specification
431 * @param spec specification
432 * @param identify_as identify as tensor as a group
433 * @param exec_order execution order to refer to
434 * @param scope common scope to attach in front of current specification name
435 * @param expose_var expose variable tensor out of graph, when
436 * allocation, this tensor will be valid max_exec_order when allocation
438 * @param expose_grad expose variable tensor out of graph, this tensor will be
439 * valid max_exec_order when allocation happens
440 * @return Tensor* tensor
442 std::vector<Var_Grad *> requestTensors(
443 const std::vector<VarGradSpecV2> &specs, TensorGroupType identify_as,
444 const GraphNode::ExecutionOrder &exec_order, const std::string &scope = "",
445 bool expose_var = false, bool expose_grad = false);
448 * @brief flush cache data
453 * @brief flush cache data except the order
455 * @param order except execution order
457 void flushCacheExcept(unsigned int order);
460 /** @todo: merge this list to one */
461 std::vector<std::unique_ptr<Weight>> weights_v2; /**< weights for the layers
463 std::vector<std::unique_ptr<Var_Grad>>
464 inputs_v2; /**< inputs for the layers */
465 std::vector<std::unique_ptr<Var_Grad>>
466 outputs_v2; /**< outputs for the layers */
467 std::vector<std::unique_ptr<Var_Grad>>
468 tensors_v2; /**< extra tensors required by the layers */
470 std::array<std::vector<std::unique_ptr<Var_Grad>>, NUM_TENSOR_GROUP_TYPE>
471 tensor_book; /**< reference to tensor book kept */
473 TensorPool weight_pool; /**< tensor pool to request tensors */
474 TensorPool tensor_pool; /**< tensor pool to request tensors */
476 bool enable_optimizations; /**< to enable memory optimizations */
479 * @brief Finalize the given tensor pool
481 * @param pool Tensor pool to finalize
482 * @param start Start execution order
483 * @param end End execution order
485 void finalizeTensorPool(TensorPool &pool, unsigned int start,
489 } // namespace nntrainer
491 #endif /* __cplusplus */
492 #endif /* __MANAGER_H__ */