From f38657aef2b0dd428ac5205f44295ed4e8953b47 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Tue, 20 Dec 2022 10:54:39 +0900 Subject: [PATCH] [Tensor] Remove calcGrad step for trainable layer This patch is for implementing tarinable property behavior. If a layer is set as non-trainable, it doesnot need to execute calcGrad step, so we can remove it from execution order, also skip gradient calculation. Signed-off-by: Jiho Chu --- nntrainer/graph/network_graph.cpp | 2 +- nntrainer/models/neuralnet.cpp | 8 +++++--- nntrainer/tensor/manager.cpp | 25 +++++++++++++++---------- nntrainer/tensor/manager.h | 8 ++++---- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 3eb929c..6bdf553 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -824,7 +824,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr &lnode, lnode->getTrainable(), shared_weight_names), inputs, outputs, tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(), - shared_tensor_names)); + lnode->getTrainable(), shared_tensor_names)); return outputs; } diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 0367042..7faa69a 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -313,12 +313,14 @@ void NeuralNetwork::backwarding(int iteration, PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName()); bool apply_gradient = true; + /** If gradient optimization mode, then calculate gradient first */ if (dynamic_training_opt.isGradientMode()) node->calcGradient(); /** - * If optimization off, or gradient must be applied, then this will be true + * If optimization off, or gradient must be applied, then this will be + * true * @todo This apply gradient should be passed to the each weight and later * be queried when updating gradient at once. (after moving apply_gradient * out of this function) @@ -326,8 +328,8 @@ void NeuralNetwork::backwarding(int iteration, */ // auto &layer = node->getObject(); // apply_gradient = dynamic_training_opt.checkIfApply( - // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], opt, - // iteration); + // layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], + // opt, iteration); /** If gradient must be applied and its not gradient mode, calculate * gradient diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index ad6be0e..ac70455 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -354,10 +354,9 @@ std::vector Manager::requestWeights( const auto [forwarding_order, calcGradient_order, calcDerivative_order] = node.getExecutionOrder(); - std::vector var_exec_order( - {forwarding_order, calcGradient_order, calcDerivative_order}); - std::vector default_grad_exec_order( - {calcGradient_order, calcDerivative_order}); + std::vector default_var_exec_order( + {forwarding_order, calcDerivative_order}); + std::vector default_grad_exec_order({calcDerivative_order}); TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN; TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN; @@ -368,7 +367,14 @@ std::vector Manager::requestWeights( for (unsigned int i = 0; i < weights_spec.size(); ++i) { auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm, need_gradient, name] = weights_spec.at(i); + auto var_exec_order = default_var_exec_order; auto grad_exec_order = default_grad_exec_order; + + if (trainable) { + var_exec_order.insert(var_exec_order.begin(), calcGradient_order); + grad_exec_order.insert(grad_exec_order.begin(), calcGradient_order); + } + /** * If the weight is supposed to be clip by global norm, extend its exec * order with the max exec order where it will be used for clipping and then @@ -417,10 +423,9 @@ std::vector Manager::requestWeights( * @brief Create weights with the given spec * */ -std::vector -Manager::requestTensors(const GraphNode &node, - const std::vector &tensors_spec, - const std::vector &shared_names) { +std::vector Manager::requestTensors( + const GraphNode &node, const std::vector &tensors_spec, + bool trainable, const std::vector &shared_names) { const auto [forwarding_order, calcGradient_order, calcDerivative_order] = node.getExecutionOrder(); @@ -438,7 +443,8 @@ Manager::requestTensors(const GraphNode &node, var_exec_order.push_back(forwarding_order); /** usage for tensors gradient in backwarding */ - if (enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) { + if (trainable && + enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) { var_exec_order.push_back(calcGradient_order); grad_exec_order.push_back(calcGradient_order); } @@ -460,7 +466,6 @@ Manager::requestTensors(const GraphNode &node, dim, grad_exec_order, tspan, Tensor::Initializer::ZEROS); } - } else { var = tensor_pool.request(name, dim, var_exec_order, tspan, t_init); diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h index adf9624..455baa4 100644 --- a/nntrainer/tensor/manager.h +++ b/nntrainer/tensor/manager.h @@ -193,14 +193,14 @@ public: * * @param node Graph node to extract node identifiers/info * @param tensors_spec Specification for the tensors + * @param trainable make the weight trainable if true * @param shared_names if tensor is shared, name is needed * * @return created tensors list */ - std::vector - requestTensors(const GraphNode &node, - const std::vector &tensors_spec, - const std::vector &shared_names = {}); + std::vector requestTensors( + const GraphNode &node, const std::vector &tensors_spec, + bool trainable, const std::vector &shared_names = {}); /** * @brief Create tensors with the given spec -- 2.7.4