[Tensor] Remove calcGrad step for trainable layer
authorJiho Chu <jiho.chu@samsung.com>
Tue, 20 Dec 2022 01:54:39 +0000 (10:54 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Tue, 7 Feb 2023 22:55:08 +0000 (07:55 +0900)
This patch is for implementing tarinable property behavior.

If a layer is set as non-trainable, it doesnot need to execute
calcGrad step, so we can remove it from execution order, also
skip gradient calculation.

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
nntrainer/graph/network_graph.cpp
nntrainer/models/neuralnet.cpp
nntrainer/tensor/manager.cpp
nntrainer/tensor/manager.h

index 3eb929c..6bdf553 100644 (file)
@@ -824,7 +824,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
                                    lnode->getTrainable(), shared_weight_names),
     inputs, outputs,
     tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
-                                   shared_tensor_names));
+                                   lnode->getTrainable(), shared_tensor_names));
 
   return outputs;
 }
index 0367042..7faa69a 100644 (file)
@@ -313,12 +313,14 @@ void NeuralNetwork::backwarding(int iteration,
     PROFILE_MEM_ANNOTATE("CalcGradient: " + node->getName());
 
     bool apply_gradient = true;
+
     /** If gradient optimization mode, then calculate gradient first */
     if (dynamic_training_opt.isGradientMode())
       node->calcGradient();
 
     /**
-     * If optimization off, or gradient must be applied, then this will be true
+     * If optimization off, or gradient must be applied, then this will be
+     * true
      * @todo This apply gradient should be passed to the each weight and later
      * be queried when updating gradient at once. (after moving apply_gradient
      * out of this function)
@@ -326,8 +328,8 @@ void NeuralNetwork::backwarding(int iteration,
      */
     // auto &layer = node->getObject();
     // apply_gradient = dynamic_training_opt.checkIfApply(
-    //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0], opt,
-    //   iteration);
+    //   layer->getWeightsRef(), layer->net_input[0], layer->net_hidden[0],
+    //   opt, iteration);
 
     /** If gradient must be applied and its not gradient mode, calculate
      * gradient
index ad6be0e..ac70455 100644 (file)
@@ -354,10 +354,9 @@ std::vector<Weight *> Manager::requestWeights(
   const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
     node.getExecutionOrder();
 
-  std::vector<unsigned int> var_exec_order(
-    {forwarding_order, calcGradient_order, calcDerivative_order});
-  std::vector<unsigned int> default_grad_exec_order(
-    {calcGradient_order, calcDerivative_order});
+  std::vector<unsigned int> default_var_exec_order(
+    {forwarding_order, calcDerivative_order});
+  std::vector<unsigned int> default_grad_exec_order({calcDerivative_order});
 
   TensorLifespan var_ls = TensorLifespan::MAX_LIFESPAN;
   TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN;
@@ -368,7 +367,14 @@ std::vector<Weight *> Manager::requestWeights(
   for (unsigned int i = 0; i < weights_spec.size(); ++i) {
     auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm,
            need_gradient, name] = weights_spec.at(i);
+    auto var_exec_order = default_var_exec_order;
     auto grad_exec_order = default_grad_exec_order;
+
+    if (trainable) {
+      var_exec_order.insert(var_exec_order.begin(), calcGradient_order);
+      grad_exec_order.insert(grad_exec_order.begin(), calcGradient_order);
+    }
+
     /**
      * If the weight is supposed to be clip by global norm, extend its exec
      * order with the max exec order where it will be used for clipping and then
@@ -417,10 +423,9 @@ std::vector<Weight *> Manager::requestWeights(
  * @brief     Create weights with the given spec
  *
  */
-std::vector<Var_Grad *>
-Manager::requestTensors(const GraphNode &node,
-                        const std::vector<Var_Grad::Spec> &tensors_spec,
-                        const std::vector<std::string> &shared_names) {
+std::vector<Var_Grad *> Manager::requestTensors(
+  const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+  bool trainable, const std::vector<std::string> &shared_names) {
   const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
     node.getExecutionOrder();
 
@@ -438,7 +443,8 @@ Manager::requestTensors(const GraphNode &node,
       var_exec_order.push_back(forwarding_order);
 
     /** usage for tensors gradient in backwarding */
-    if (enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
+    if (trainable &&
+        enum_class_logical_and(tspan, TensorLifespan::CALC_GRAD_LIFESPAN)) {
       var_exec_order.push_back(calcGradient_order);
       grad_exec_order.push_back(calcGradient_order);
     }
@@ -460,7 +466,6 @@ Manager::requestTensors(const GraphNode &node,
                                            dim, grad_exec_order, tspan,
                                            Tensor::Initializer::ZEROS);
       }
-
     } else {
       var = tensor_pool.request(name, dim, var_exec_order, tspan, t_init);
 
index adf9624..455baa4 100644 (file)
@@ -193,14 +193,14 @@ public:
    *
    * @param node Graph node to extract node identifiers/info
    * @param tensors_spec Specification for the tensors
+   * @param trainable make the weight trainable if true
    * @param shared_names if tensor is shared, name is needed
    *
    * @return created tensors list
    */
-  std::vector<Var_Grad *>
-  requestTensors(const GraphNode &node,
-                 const std::vector<Var_Grad::Spec> &tensors_spec,
-                 const std::vector<std::string> &shared_names = {});
+  std::vector<Var_Grad *> requestTensors(
+    const GraphNode &node, const std::vector<Var_Grad::Spec> &tensors_spec,
+    bool trainable, const std::vector<std::string> &shared_names = {});
 
   /**
    * @brief     Create tensors with the given spec