From 6f01a34c92bcc58af113ffc20d52d0ba10885dbd Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Wed, 19 Jan 2022 19:40:02 +0900 Subject: [PATCH] [Tensor Pool] Add expose/persist concept This patch add expose/persist concept to the tensor pool and manager. When a tensor is exposed, this means that the tensor is gauranteed to remain valid max_exec where max_exec is the value passed along allocateTensors(max_exec); **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- nntrainer/graph/network_graph.cpp | 3 +-- nntrainer/tensor/manager.cpp | 28 ++++++++++++++++------------ nntrainer/tensor/manager.h | 4 +--- nntrainer/tensor/tensor_pool.cpp | 8 +++++++- nntrainer/tensor/tensor_pool.h | 3 +++ 5 files changed, 28 insertions(+), 18 deletions(-) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 638974e2..6101a844 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -799,8 +799,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr &lnode, lnode->configureRunContext( // TODO: update weights spec for trainable based on layer trainable prop tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(), - lnode->getTrainable(), shared_weight_names, - graph_exec_end), + lnode->getTrainable(), shared_weight_names), inputs, outputs, tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(), shared_tensor_names)); diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index e9e8ed5d..fb3fdc2d 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -144,7 +144,8 @@ void Manager::deallocateWeights() { weight_pool.deallocate(); } static Tensor *requestTensor_(const TensorSpecV2 &spec, const GraphNode::ExecutionOrder &exec_order, - const std::string &scope, TensorPool &tp) { + const std::string &scope, TensorPool &tp, + bool expose) { using RT = TensorSpecV2::RequestType; using LS = TensorLifespan; NNTR_THROW_IF(spec.request_type == RT::MAYBE_MODIFYING_VIEW, @@ -155,6 +156,9 @@ static Tensor *requestTensor_(const TensorSpecV2 &spec, auto [forward, calc_grad, calc_deriv] = exec_order; std::vector order = spec.additional_exec_order; + if (expose) { + order.push_back(TensorPool::PERSIST_END_ORDER); + } const auto name = scope + ":" + spec.name; @@ -202,12 +206,12 @@ Var_Grad *Manager::requestTensor(const VarGradSpecV2 &spec, << "Currently, input and tensors group type is not yet implemented, use " "requestInputs() requestTensors() instead"; - Tensor *var = - requestTensor_(spec.variable_spec, exec_order, scope, tensor_pool); - Tensor *grad = - spec.gradient_spec - ? requestTensor_(*spec.gradient_spec, exec_order, scope, tensor_pool) - : nullptr; + Tensor *var = requestTensor_(spec.variable_spec, exec_order, scope, + tensor_pool, expose_var); + Tensor *grad = spec.gradient_spec + ? requestTensor_(*spec.gradient_spec, exec_order, scope, + tensor_pool, expose_grad) + : nullptr; /// @note as only supporting identify_as == TensorGroupType::output, only /// saves to outputs for now @@ -347,10 +351,10 @@ void Manager::initializeTensorsTrain(unsigned int max_exec_order_) { */ std::vector Manager::requestWeights( const GraphNode &node, const std::vector &weights_spec, - bool trainable, const std::vector &shared_names, - const unsigned int max_exec_order) { + bool trainable, const std::vector &shared_names) { const auto [forwarding_order, calcGradient_order, calcDerivative_order] = node.getExecutionOrder(); + std::vector var_exec_order( {forwarding_order, calcGradient_order, calcDerivative_order}); std::vector default_grad_exec_order( @@ -372,7 +376,7 @@ std::vector Manager::requestWeights( * applied to the weight. */ if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm)) - grad_exec_order.push_back(max_exec_order); + grad_exec_order.push_back(TensorPool::PERSIST_END_ORDER); Tensor *var = nullptr, *grad = nullptr; bool is_dependent = !shared_names.empty(); @@ -529,9 +533,9 @@ Manager::requestInputs(const GraphNode &node, inputs_v2.emplace_back(std::make_unique( requestTensor_(var_spec, node.getExecutionOrder(), node.getName(), - tensor_pool), + tensor_pool, false), requestTensor_(grad_spec, node.getExecutionOrder(), node.getName(), - tensor_pool))); + tensor_pool, false))); } ret.reserve(inputs_dim.size()); diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h index a93af9b5..7308bad4 100644 --- a/nntrainer/tensor/manager.h +++ b/nntrainer/tensor/manager.h @@ -172,15 +172,13 @@ public: * @param trainable make the weight trainable if true * @param shared_names name to refer to when the weights are borrowed from the * original source. if not shared pass empty vector - * @param max_exec_order the maximum execution order * * @return created weights list */ std::vector requestWeights(const GraphNode &node, const std::vector &weights_spec, bool trainable, - const std::vector &shared_names, - const unsigned int max_exec_order); + const std::vector &shared_names); /** * @brief Create tensors with the given spec diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp index 3b4db9ff..648db6ce 100644 --- a/nntrainer/tensor/tensor_pool.cpp +++ b/nntrainer/tensor/tensor_pool.cpp @@ -126,8 +126,14 @@ void TensorPool::finalize(const MemoryPlanner &planner, unsigned int validity_end = validity_start; for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) { - if (details->exec_order[idx] <= end_order) + if (details->exec_order[idx] == PERSIST_END_ORDER) { + validity_end = end_order; + break; + } + + if (details->exec_order[idx] <= end_order) { validity_end = std::max(validity_end, details->exec_order[idx]); + } } /** diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h index 271463d2..c3e590e3 100644 --- a/nntrainer/tensor/tensor_pool.h +++ b/nntrainer/tensor/tensor_pool.h @@ -17,6 +17,7 @@ #ifdef __cplusplus #include +#include #include #include #include @@ -35,6 +36,8 @@ namespace nntrainer { class TensorPool { public: + static constexpr unsigned PERSIST_END_ORDER = + std::numeric_limits::max(); /** * @brief Constructor of TensorPool */ -- 2.34.1