[Tensor] Remove calcGrad step for trainable layer

author Jiho Chu <jiho.chu@samsung.com>

Tue, 20 Dec 2022 01:54:39 +0000 (10:54 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 7 Feb 2023 22:55:08 +0000 (07:55 +0900)
author Jiho Chu <jiho.chu@samsung.com>
Tue, 20 Dec 2022 01:54:39 +0000 (10:54 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 7 Feb 2023 22:55:08 +0000 (07:55 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 3eb929c..6bdf553 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -824,7 +824,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
                                     lnode->getTrainable(), shared_weight_names),
      inputs, outputs,
      tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
-                                   shared_tensor_names));
+                                   lnode->getTrainable(), shared_tensor_names));
  
    return outputs;
  }
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index 0367042..7faa69a 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -313,12 +313,14 @@ void NeuralNetwork::backwarding(int iteration,
      PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
  
      bool apply_gradient = true;
+
      /** If gradient optimization mode, then calculate gradient first */
      if (dynamic_training_opt.isGradientMode())
        node->calcGradient();
  
      /**
-     * If optimization off, or gradient must be applied, then this will be true
+     * If optimization off, or gradient must be applied, then this will be
+     * true
       * @todo This apply gradient should be passed to the each weight and later
       * be queried when updating gradient at once. (after moving apply_gradient
       * out of this function)
@@ -326,8 +328,8 @@ void NeuralNetwork::backwarding(int iteration,
       */
      // auto &layer = node->getObject();
      // apply_gradient = dynamic_training_opt.checkIfApply(
-    //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], opt,
-    //   iteration);
+    //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
+    //   opt, iteration);
  
      /** If gradient must be applied and its not gradient mode, calculate
       * gradient
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index ad6be0e..ac70455 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -354,10 +354,9 @@ std::vector<Weight *> Manager::requestWeights(
    const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
      node.getExecutionOrder();
  
-  std::vector<unsigned int> var_exec_order(
-    {forwarding_order, calcGradient_order, calcDerivative_order});
-  std::vector<unsigned int> default_grad_exec_order(
-    {calcGradient_order, calcDerivative_order});
+  std::vector<unsigned int> default_var_exec_order(
+    {forwarding_order, calcDerivative_order});
+  std::vector<unsigned int> default_grad_exec_order({calcDerivative_order});
  
    TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN;
    TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN;
@@ -368,7 +367,14 @@ std::vector<Weight *> Manager::requestWeights(
    for (unsigned int i = 0; i < weights_spec.size(); ++i) {
      auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm,
             need_gradient, name] = weights_spec.at(i);
+    auto var_exec_order = default_var_exec_order;
      auto grad_exec_order = default_grad_exec_order;
+
+    if (trainable) {
+      var_exec_order.insert(var_exec_order.begin(), calcGradient_order);
+      grad_exec_order.insert(grad_exec_order.begin(), calcGradient_order);
+    }
+
      /**
       * If the weight is supposed to be clip by global norm, extend its exec
       * order with the max exec order where it will be used for clipping and then
@@ -417,10 +423,9 @@ std::vector<Weight *> Manager::requestWeights(
   * @brief     Create weights with the given spec
   *
   */
-std::vector<Var_Grad *>
-Manager::requestTensors(const GraphNode &node,
-                        const std::vector<Var_Grad::Spec> &tensors_spec,
-                        const std::vector<std::string> &shared_names) {
+std::vector<Var_Grad *> Manager::requestTensors(
+  const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+  bool trainable, const std::vector<std::string> &shared_names) {
    const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
      node.getExecutionOrder();
  
@@ -438,7 +443,8 @@ Manager::requestTensors(const GraphNode &node,
        var_exec_order.push_back(forwarding_order);
  
      /** usage for tensors gradient in backwarding */
-    if (enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
+    if (trainable &&
+        enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
        var_exec_order.push_back(calcGradient_order);
        grad_exec_order.push_back(calcGradient_order);
      }
@@ -460,7 +466,6 @@ Manager::requestTensors(const GraphNode &node,
                                             dim, grad_exec_order, tspan,
                                             Tensor::Initializer::ZEROS);
        }
-
      } else {
        var = tensor_pool.request(name, dim, var_exec_order, tspan, t_init);
  
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h

index adf9624..455baa4 100644 (file)
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -193,14 +193,14 @@ public:
     *
     * @param node Graph node to extract node identifiers/info
     * @param tensors_spec Specification for the tensors
+   * @param trainable make the weight trainable if true
     * @param shared_names if tensor is shared, name is needed
     *
     * @return created tensors list
     */
-  std::vector<Var_Grad *>
-  requestTensors(const GraphNode &node,
-                 const std::vector<Var_Grad::Spec> &tensors_spec,
-                 const std::vector<std::string> &shared_names = {});
+  std::vector<Var_Grad *> requestTensors(
+    const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+    bool trainable, const std::vector<std::string> &shared_names = {});
  
    /**
     * @brief     Create tensors with the given spec
author	Jiho Chu <jiho.chu@samsung.com>
	Tue, 20 Dec 2022 01:54:39 +0000 (10:54 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 7 Feb 2023 22:55:08 +0000 (07:55 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/models/neuralnet.cpp		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history
nntrainer/tensor/manager.h		patch \| blob \| history