Change loading meta information behavior

author Jihoon Lee <jhoon.it.lee@samsung.com>

Fri, 11 Mar 2022 15:42:54 +0000 (00:42 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 14 Mar 2022 04:10:36 +0000 (13:10 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Fri, 11 Mar 2022 15:42:54 +0000 (00:42 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 14 Mar 2022 04:10:36 +0000 (13:10 +0900)
diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp

index 6c15112..d565611 100644 (file)
--- a/nntrainer/layers/layer_context.cpp
+++ b/nntrainer/layers/layer_context.cpp
@@ -172,9 +172,6 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const {
   */
  Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
                                           unsigned int jdx) const {
-  if (!weights[idx]->hasGradient())
-    throw std::invalid_argument(
-      "Requesting gradient for a non-trainable weight.");
    return weights[idx]->getOptimizerVariableRef(jdx);
  }
  
@@ -185,9 +182,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
   * @return int Number of the weight optimizer variable
   */
  unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
-  if (!weights[idx]->hasGradient())
-    throw std::invalid_argument(
-      "Requesting gradient for a non-trainable weight.");
    return weights[idx]->getNumOptVariable();
  }
  
diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp

index f9e6d50..0820c59 100644 (file)
--- a/nntrainer/layers/layer_node.cpp
+++ b/nntrainer/layers/layer_node.cpp
@@ -421,12 +421,9 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
    if (opt_var) {
      for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
        if (run_context->isGradientLastAccess(i) && getTrainable()) {
-        // @note read optimizer variables
-        if (run_context->weightHasGradient(i)) {
-          for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
-               ++j) {
-            run_context->getWeightOptVar(i, j).read(file);
-          }
+        /// @note read optimizer variables
+        for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+          run_context->getWeightOptVar(i, j).read(file);
          }
        }
      }
@@ -466,6 +463,19 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
    }
  }
  
+void LayerNode::clearOptVar() {
+  NNTR_THROW_IF(!run_context, std::runtime_error)
+    << __func__ << " layer needs to be finalized first!";
+  for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+    if (run_context->isGradientLastAccess(i) && getTrainable()) {
+      /// @note read optimizer variables
+      for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+        run_context->getWeightOptVar(i, j).initialize();
+      }
+    }
+  }
+}
+
  /**
   * @brief     Finalize creating the layer node
   */
@@ -624,7 +634,8 @@ void LayerNode::setBatch(unsigned int batch) {
   * @brief   If the current layer can support in-place
   */
  bool LayerNode::supportInPlace() const {
-  ///@note below is a quick fix, we need to have a guard that this shouldn't be
+  ///@note below is a quick fix, we need to have a guard that this shouldn't
+  /// be
    /// query until realizeProps has been finalized ( which means we will need
    /// another end point to fixate this property )
    if (getDistribute()) {
diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h

index ede2006..9a50f22 100644 (file)
--- a/nntrainer/layers/layer_node.h
+++ b/nntrainer/layers/layer_node.h
@@ -584,6 +584,12 @@ public:
    void save(std::ofstream &file, bool opt_var = false) const;
  
    /**
+   * @brief clear optimizer variable to initial state
+   *
+   */
+  void clearOptVar();
+
+  /**
     * @brief     get loss for the layer
     * @return    loss of the layer
     */
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index 8eaba4d..b7a30e0 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -71,8 +71,6 @@ NeuralNetwork::NeuralNetwork(AppContext app_context_) :
    initialized(false),
    compiled(false),
    loadedFromConfig(false),
-  loadedWeight(false),
-  bin_file_pos(0),
    app_context(app_context_) {}
  
  int NeuralNetwork::loadFromConfig(const std::string &config) {
@@ -189,6 +187,8 @@ int NeuralNetwork::initialize() {
      std::get<props::TrainingBatchSize>(model_flex_props));
  
    // initialize optimizer and related variables
+  /// @todo: initialize should take a mode and check if mode is train but
+  /// optimizer is not given, make it as a hard error
    if (opt) {
      /** TODO: update request of optimizer to be of same format as
       * Layer::requestTensor */
@@ -205,11 +205,7 @@ int NeuralNetwork::initialize() {
  
    initialized = true;
  
-  // @note we need check loadedWeight for the case of multiple call of load to
-  // load weight. Only the weight needs to be loaded here. Becuase the buffer
-  // for the optimizer is not allocated yet.
-  // loadedWeight check is just for the duplicate load of weight.
-  if (!load_path.empty() && !loadedWeight) {
+  if (!load_path.empty()) {
      load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
    }
  
@@ -328,14 +324,12 @@ void NeuralNetwork::save(const std::string &file_path,
    switch (format) {
    case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
      auto model_file = checkedOpenStream<std::ofstream>(
-      file_path, std::ios::out | std::ios::binary);
+      file_path, std::ios::out | std::ios::binary | std::ios::trunc);
      for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
        (*iter)->save(model_file);
      }
-
-    opt->save(model_file);
-
-    if (istrequal(opt->getType(), "adam")) {
+    if (opt && istrequal(opt->getType(), "adam")) {
+      model_file.write("adam", 4);
        for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
             iter++) {
          (*iter)->save(model_file, true);
@@ -381,22 +375,13 @@ void NeuralNetwork::load(const std::string &file_path,
  
      auto model_file = checkedOpenStream<std::ifstream>(
        file_path, std::ios::in | std::ios::binary);
-    if (!loadedWeight) {
-      for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
-           iter++) {
-        (*iter)->read(model_file);
-      }
-      loadedWeight = true;
-      bin_file_pos = model_file.tellg();
-      load_path = file_path;
-      return;
+    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+      (*iter)->read(model_file);
      }
      try {
        /// this is assuming that the failure is allowed at the end of the file
        /// read. so, after this line, additional read shouldn't be called
-      model_file.seekg(bin_file_pos);
-
-      if (istrequal(opt->getType(), "adam")) {
+      if (opt && istrequal(opt->getType(), "adam")) {
          char opt_type[4];
          model_file.read(opt_type, 4);
          if (istrequal(opt_type, "adam")) {
@@ -412,7 +397,8 @@ void NeuralNetwork::load(const std::string &file_path,
        checkedRead(model_file, (char *)&iter, sizeof(iter),
                    "[NeuralNetwork::readModel] failed to read iteration");
      } catch (...) {
-      std::cerr << "failed to read epoch idx, proceeding with default index\n";
+      std::cerr << "failed to read additional data like optimizer variable, "
+                   "iteration, proceeding with default\n";
      }
  
      ml_logi("read modelfile: %s", file_path.c_str());
@@ -644,11 +630,6 @@ int NeuralNetwork::train(const std::vector<std::string> &values) {
    status = allocate(ExecutionMode::TRAIN);
    NN_RETURN_STATUS();
  
-  // @note Need to be here to read the optimizer variables
-  if (!load_path.empty()) {
-    load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
-  }
-
    status = train_run();
    NN_RETURN_STATUS();
  
@@ -668,10 +649,14 @@ int NeuralNetwork::train_run() {
    int status = ML_ERROR_NONE;
  
    if (!std::get<props::ContinueTrain>(model_flex_props)) {
-    epoch_idx = 0;
      iter = 0;
+    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+      (*iter)->clearOptVar();
+    }
    }
  
+  epoch_idx = 0;
+
    auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
  
    auto const &outputs = model_graph.getOutputTensors();
@@ -845,8 +830,6 @@ void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) {
      swap(lhs.graph_representation, rhs.graph_representation);
      swap(lhs.compiled, rhs.compiled);
      swap(lhs.loadedFromConfig, rhs.loadedFromConfig);
-    swap(lhs.loadedWeight, rhs.loadedWeight);
-    swap(lhs.bin_file_pos, rhs.bin_file_pos);
    }
  }
  
diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h

index b12a476..8b4a19a 100644 (file)
--- a/nntrainer/models/neuralnet.h
+++ b/nntrainer/models/neuralnet.h
@@ -530,10 +530,6 @@ private:
  
    bool loadedFromConfig; /**< Check if config is loaded to prevent load twice */
  
-  bool loadedWeight; /**< Check if weight is loaded to prevent load twice */
-
-  uint64_t bin_file_pos; /**< save file position to load later*/
-
    RunStats validation; /** validation statistics of the model */
    RunStats training;   /** training statistics of the model */
    RunStats testing;    /** testing statistics of the model */
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index 7b212e0..4a88081 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -597,13 +597,15 @@ bool Manager::isSecondLastAccess(const std::string &name,
  std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
    const std::vector<TensorDim> &dims, const std::string &name,
    const TensorLifespan &lifespan, Tensor::Initializer initializer) {
-  auto const &exec_order = weight_pool.getExecutionOrder(name);
+  auto const exec_order = weight_pool.getExecutionOrder(name);
  
    std::vector<Tensor *> ret;
    ret.reserve(dims.size());
  
+  /// @note this is assuming weight optimizer variables is treated as weight, if
+  /// not, there is room to optimize below behavior
    for (unsigned int idx = 0; idx < dims.size(); idx++)
-    ret.push_back(tensor_pool.request(name + ":opt" + std::to_string(idx),
+    ret.push_back(weight_pool.request(name + ":opt" + std::to_string(idx),
                                        dims[idx], exec_order, lifespan,
                                        initializer));
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Fri, 11 Mar 2022 15:42:54 +0000 (00:42 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 14 Mar 2022 04:10:36 +0000 (13:10 +0900)
nntrainer/layers/layer_context.cpp		patch \| blob \| history
nntrainer/layers/layer_node.cpp		patch \| blob \| history
nntrainer/layers/layer_node.h		patch \| blob \| history
nntrainer/models/neuralnet.cpp		patch \| blob \| history
nntrainer/models/neuralnet.h		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history