[Fix] Setting batch after finalize/initialize

author Jihoon Lee <jhoon.it.lee@samsung.com>

Tue, 30 Nov 2021 03:54:18 +0000 (12:54 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 3 Dec 2021 11:38:55 +0000 (20:38 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Tue, 30 Nov 2021 03:54:18 +0000 (12:54 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 3 Dec 2021 11:38:55 +0000 (20:38 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 1957bbc21946e17ef0c48d7f57e1ed0ac057d84e..da70eff8fd9eb8bec84997b6bbd022b4a0718628 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -13,7 +13,6 @@
   */
  
  #include <cmath>
-#include <sstream>
  #include <stdexcept>
  #include <string>
  
@@ -262,10 +261,12 @@ void NetworkGraph::setBatchSize(unsigned int batch_size) {
      deallocateTensors();
  
    for (auto iter = cbegin(); iter != cend(); iter++) {
-    (*iter)->setBatch(batch_size);
      if ((*iter)->isFinalized()) {
+      /// resize tensors spec
+      /// @todo remove below, if cutsom tensor needs to change dimension
+      /// according to the tensor, it must be done explicitly, or at least have
+      /// a property to control the behavior
        const RunLayerContext &context = (*iter)->getRunContext();
-      // resize tensors spec
        for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
          auto const &ts = context.getTensor(idx);
          tensor_manager->setBatchSize(ts.getName(), ts.getDim().batch());
@@ -275,6 +276,8 @@ void NetworkGraph::setBatchSize(unsigned int batch_size) {
                                         ts_grad.getDim().batch());
          }
        }
+      /// override setting batch as per request
+      (*iter)->setBatch(batch_size);
      }
    }
    /// resize input and output spec
@@ -688,7 +691,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
  
    /**
     * Request manager for either a pre-allocated input as output or a newly
-   * allocated input. This is necesary for manager to know when this output
+   * allocated input. This is neccesary for manager to know when this output
     * node is going to be used with in-place optimizations.
     */
    const std::vector<Var_Grad *> &outputs =
diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp

index f9bae352370d8482fb383bbdc1468459f2e4c4c2..6ec276f408c29c6c592ed186750dae1165619f9d 100644 (file)
--- a/nntrainer/layers/bn_layer.cpp
+++ b/nntrainer/layers/bn_layer.cpp
@@ -126,7 +126,7 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
     * as the output of this layer need not be stored all the time.
     */
    wt_idx[BNParams::t_full] =
-    context.requestTensor(in_dim, "tesnor_full", Tensor::Initializer::NONE,
+    context.requestTensor(in_dim, "tensor_full", Tensor::Initializer::NONE,
                            false, TensorLifespan::BACKWARD_FUNC_LIFESPAN);
    /**
     * caches variance + epsilon as well.
@@ -256,6 +256,18 @@ void BatchNormalizationLayer::setBatch(RunLayerContext &context,
                                         unsigned int batch) {
    context.updateTensor(wt_idx[BNParams::deviation], batch);
    context.updateTensor(wt_idx[BNParams::t_full], batch);
+
+  /// reset divider
+  divider = 1;
+  auto input_dim = context.getInput(0).getDim();
+  for (auto axis : axes_to_reduce) {
+    if (axis == 0) {
+      /// @note input dim batch is not updated, it will be more sensible we
+      /// update batch before any node comes to this spot
+      divider *= batch;
+    }
+    divider *= input_dim.getTensorDim(axis);
+  }
  }
  
  } /* namespace nntrainer */
diff --git a/nntrainer/layers/grucell.cpp b/nntrainer/layers/grucell.cpp

index 2355abbf0fbd4e9aaf4cb564f27fc359d1d8366a..aef70c33362c74e962916f02ba8c97b992850742 100644 (file)
--- a/nntrainer/layers/grucell.cpp
+++ b/nntrainer/layers/grucell.cpp
@@ -409,7 +409,11 @@ void GRUCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
    context.updateTensor(wt_idx[GRUCellParams::hidden_state],
                         max_timestep * batch);
    context.updateTensor(wt_idx[GRUCellParams::zrg], max_timestep * batch);
-  context.updateTensor(wt_idx[GRUCellParams::dropout_mask], batch);
+
+  const float dropout_rate = std::get<props::DropOutRate>(grucell_props);
+  if (dropout_rate > epsilon) {
+    context.updateTensor(wt_idx[GRUCellParams::dropout_mask], batch);
+  }
  }
  
  } // namespace nntrainer
diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp

index 9e4fd91ebde4bbf4d3112e7dffd1afe926ffab91..3793d93b2d60b3aab7f176180521af7cba34b14b 100644 (file)
--- a/nntrainer/layers/layer_node.cpp
+++ b/nntrainer/layers/layer_node.cpp
@@ -611,20 +611,10 @@ void LayerNode::calcGradient() {
   * @brief Set the batch for the layer
   */
  void LayerNode::setBatch(unsigned int batch) {
-  /** @todo we won't going to need Layer::setBatch(InitLayerContext), remove it
-   */
-  if (hasInputShapeProperty()) {
-    auto &input_shapes =
-      std::get<std::vector<props::InputShape>>(*layer_node_props);
-    for (auto &input_shape : input_shapes) {
-      input_shape.get().batch(batch);
-    }
-  }
+  NNTR_THROW_IF(!run_context, std::invalid_argument)
+    << " setting batch not supported before initialization";
  
-  if (run_context) {
-    run_context->setBatch(batch);
-    getLayer()->setBatch(*run_context, batch);
-  }
+  getLayer()->setBatch(*run_context, batch);
  }
  
  /**
diff --git a/nntrainer/layers/lstmcell.cpp b/nntrainer/layers/lstmcell.cpp

index 1b5a4e9916aebc0c084ef6ed38be784ea688e59c..4daf12ef20e4473e4555dde7daf7305f4038cc7f 100644 (file)
--- a/nntrainer/layers/lstmcell.cpp
+++ b/nntrainer/layers/lstmcell.cpp
@@ -378,7 +378,11 @@ void LSTMCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
    context.updateTensor(wt_idx[LSTMParams::hidden_state], batch * max_timestep);
    context.updateTensor(wt_idx[LSTMParams::mem_cell], batch * max_timestep);
    context.updateTensor(wt_idx[LSTMParams::fgio], batch * max_timestep);
-  context.updateTensor(wt_idx[LSTMParams::dropout_mask], batch);
+
+  const float dropout_rate = std::get<props::DropOutRate>(lstm_props);
+  if (dropout_rate > epsilon) {
+    context.updateTensor(wt_idx[LSTMParams::dropout_mask], batch);
+  }
  }
  
  } // namespace nntrainer
diff --git a/nntrainer/layers/rnncell.cpp b/nntrainer/layers/rnncell.cpp

index 2bb56c4dea21caef5058b68c3310b64496b50cc9..2096fd1201448f0475dfa4d338d086f756fde44a 100644 (file)
--- a/nntrainer/layers/rnncell.cpp
+++ b/nntrainer/layers/rnncell.cpp
@@ -12,6 +12,7 @@
   */
  
  #include <cmath>
+#include <common_properties.h>
  
  #include <layer_context.h>
  #include <nntrainer_error.h>
@@ -248,7 +249,12 @@ void RNNCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
    const unsigned int max_timestep = std::get<props::MaxTimestep>(rnncell_props);
    context.updateTensor(wt_idx[RNNCellParams::hidden_state],
                         batch * max_timestep);
-  context.updateTensor(wt_idx[RNNCellParams::dropout_mask], batch);
+
+  const float dropout_rate = std::get<props::DropOutRate>(rnncell_props);
+  if (dropout_rate > epsilon) {
+    /// @note default value of wt_idx[dropout_mask] is 0
+    context.updateTensor(wt_idx[RNNCellParams::dropout_mask], batch);
+  }
  }
  
  } // namespace nntrainer
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index ce592e369acea51e062503300aa825959b5de6f7..5eafc8044e3652c0b535c3163e50f473c6444984 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -159,9 +159,6 @@ int NeuralNetwork::initialize() {
  
    ml_logd("initializing neural network, layer size: %d", n_layers);
  
-  model_graph.setBatchSize(
-    std::get<props::TrainingBatchSize>(model_flex_props));
-
    auto &input_conn_prop =
      std::get<std::vector<props::InputConnection>>(model_props);
    auto &label_layer_prop =
@@ -181,6 +178,9 @@ int NeuralNetwork::initialize() {
      std::vector<Connection>(label_layers.begin(), label_layers.end()));
    NN_RETURN_STATUS();
  
+  model_graph.setBatchSize(
+    std::get<props::TrainingBatchSize>(model_flex_props));
+
    // initialize optimizer and related variables
    if (opt) {
      /** TODO: update request of optimizer to be of same format as
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Tue, 30 Nov 2021 03:54:18 +0000 (12:54 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 3 Dec 2021 11:38:55 +0000 (20:38 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/layers/bn_layer.cpp		patch \| blob \| history
nntrainer/layers/grucell.cpp		patch \| blob \| history
nntrainer/layers/layer_node.cpp		patch \| blob \| history
nntrainer/layers/lstmcell.cpp		patch \| blob \| history
nntrainer/layers/rnncell.cpp		patch \| blob \| history
nntrainer/models/neuralnet.cpp		patch \| blob \| history