[save/load] save/load the optimizer parameters

author Parichay Kapoor <pk.kapoor@samsung.com>

Tue, 16 Jun 2020 10:12:04 +0000 (19:12 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Wed, 17 Jun 2020 10:19:22 +0000 (19:19 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Tue, 16 Jun 2020 10:12:04 +0000 (19:12 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Wed, 17 Jun 2020 10:19:22 +0000 (19:19 +0900)
diff --git a/nntrainer/include/fc_layer.h b/nntrainer/include/fc_layer.h

index 52ae6a0..0cd1739 100644 (file)
--- a/nntrainer/include/fc_layer.h
+++ b/nntrainer/include/fc_layer.h
@@ -94,6 +94,14 @@ public:
    int setProperty(std::vector<std::string> values);
  
    /**
+   * @brief     Optimizer Setter
+   * @param[in] opt Optimizer
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int setOptimizer(Optimizer &opt);
+
+  /**
     * @brief     Property Enumeration
     *            1. bias zero : bool
     *            4. activation : bool
diff --git a/nntrainer/include/neuralnet.h b/nntrainer/include/neuralnet.h

index ac7c249..61f0502 100644 (file)
--- a/nntrainer/include/neuralnet.h
+++ b/nntrainer/include/neuralnet.h
@@ -145,12 +145,12 @@ public:
    int backwarding(Tensor input, Tensor expected_output, int iteration);
  
    /**
-   * @brief     save W & B into file
+   * @brief     save model and training parameters into file
     */
    void saveModel();
  
    /**
-   * @brief     read W & B from file
+   * @brief     read model and training parameters from file
     */
    void readModel();
  
@@ -259,6 +259,7 @@ public:
      batch_size = 7,
      epochs = 8,
      model_file = 9,
+    continue_train = 10,
    };
  
  private:
@@ -314,6 +315,7 @@ private:
  
    /**
     * @brief     Optimizer
+   * @note      This gets copied into each layer, do not use this directly
     */
    Optimizer opt;
  
@@ -333,9 +335,14 @@ private:
    std::shared_ptr<DataBuffer> data_buffer;
  
    /**
+   * @brief    Continue train from the previous state of optimizer and iterations
+   */
+  bool continue_train;
+
+  /**
     * @brief     Number of iterations trained
     */
-  int iter;
+  uint64_t iter;
  };
  
  } /* namespace nntrainer */
diff --git a/nntrainer/include/optimizer.h b/nntrainer/include/optimizer.h

index 5be118d..8a7671b 100644 (file)
--- a/nntrainer/include/optimizer.h
+++ b/nntrainer/include/optimizer.h
@@ -57,13 +57,17 @@ typedef struct WeightDecayParam_ {
  /**
   * @brief     type for the Optimizor to save hyper-parameter
   */
-typedef struct {
+typedef struct _OptParam {
    float learning_rate;
    double beta1;
    double beta2;
    double epsilon;
    float decay_rate;
    float decay_steps;
+  bool continue_train;  /** Continue training with previous tensors for adam */
+
+  _OptParam() : learning_rate(0.0), beta1(0.0), beta2(0.0), epsilon(0.0),
+    decay_rate(0.0), decay_steps(0.0), continue_train(false) {}
  } OptParam;
  
  class Optimizer {
@@ -71,12 +75,12 @@ public:
    /**
     * @brief     Constructor of Optimizer Class
     */
-  Optimizer();
+  Optimizer() : type(OptType::unknown), popt() {}
  
    /**
     * @brief     Destructor of Optimizer Class
     */
-  ~Optimizer(){};
+  ~Optimizer() {}
  
    /**
     * @brief     set Optimizer Type
@@ -135,8 +139,8 @@ public:
    /**
     * @brief     initialize optimizer. Initialize Weight if it is adam
     * @param[in] d TensorDim
-   * @param[in] setTensor true if the layer need wieght update.
-   *            Input Layer and Batch Noramlization layer won't need it.
+   * @param[in] setTensor true if the layer need weight update.
+   *            Input Layer and Batch Normalization layer won't need it.
     *            Therefore, it sets false.
     * @retval #ML_ERROR_NONE Successful.
     * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
@@ -172,8 +176,21 @@ public:
      beta1 = 3,
      beta2 = 4,
      epsilon = 5,
+    continue_train = 6,
    };
  
+  /**
+   * @brief     Read Training optimizer paramters from file
+   * @param[in] file input stream file
+   */
+  void read(std::ifstream &file);
+
+  /**
+   * @brief     Save Training optimizer paramters from file
+   * @param[in] file output stream file
+   */
+  void save(std::ofstream &file);
+
  private:
    /**
     * @brief Optimizer Type
diff --git a/nntrainer/include/tensor.h b/nntrainer/include/tensor.h

index fee33e1..91376e1 100644 (file)
--- a/nntrainer/include/tensor.h
+++ b/nntrainer/include/tensor.h
@@ -351,6 +351,14 @@ public:
    int getBatch() const { return dim.batch(); };
  
    /**
+   * @brief     Get size of the data
+   * @retval    size_t Size in bytes
+   */
+  size_t getSize() const {
+    return dim.getDataLen() * sizeof(decltype(data)::value_type);
+  }
+
+  /**
     * @brief     Set the element value
     * @param[in] batch batch location
     * @param[in] c channel location
diff --git a/nntrainer/src/fc_layer.cpp b/nntrainer/src/fc_layer.cpp

index b00a9d5..254ff91 100644 (file)
--- a/nntrainer/src/fc_layer.cpp
+++ b/nntrainer/src/fc_layer.cpp
@@ -110,6 +110,14 @@ int FullyConnectedLayer::setProperty(std::vector<std::string> values) {
    return status;
  }
  
+int FullyConnectedLayer::setOptimizer(Optimizer &opt) {
+  int status = Layer::setOptimizer(opt);
+  if (status != ML_ERROR_NONE)
+    return status;
+
+  return this->opt.initialize(dim, true);
+}
+
  Tensor FullyConnectedLayer::forwarding(Tensor in, int &status) {
    input = in;
    hidden = input.chain().dot(weight).add_i(bias).run();
@@ -132,11 +140,13 @@ Tensor FullyConnectedLayer::forwarding(Tensor in, int &status) {
  void FullyConnectedLayer::read(std::ifstream &file) {
    weight.read(file);
    bias.read(file);
+  opt.read(file);
  }
  
  void FullyConnectedLayer::save(std::ofstream &file) {
    weight.save(file);
    bias.save(file);
+  opt.save(file);
  }
  
  void FullyConnectedLayer::copy(std::shared_ptr<Layer> l) {
diff --git a/nntrainer/src/layer.cpp b/nntrainer/src/layer.cpp

index e34f9f3..8830f1e 100644 (file)
--- a/nntrainer/src/layer.cpp
+++ b/nntrainer/src/layer.cpp
@@ -59,7 +59,7 @@ int Layer::setOptimizer(Optimizer &opt) {
    this->opt.setType(opt.getType());
    this->opt.setOptParam(opt.getOptParam());
  
-  return this->opt.initialize(dim, true);
+  return this->opt.initialize(dim, false);
  }
  
  int Layer::checkValidation() {
diff --git a/nntrainer/src/neuralnet.cpp b/nntrainer/src/neuralnet.cpp

index ac4c76b..659c6ad 100644 (file)
--- a/nntrainer/src/neuralnet.cpp
+++ b/nntrainer/src/neuralnet.cpp
@@ -105,6 +105,7 @@ NeuralNetwork::NeuralNetwork(std::string config) {
    net_type = NET_UNKNOWN;
    data_buffer = NULL;
    iter = 0;
+  continue_train = false;
    this->setConfig(config);
  }
  
@@ -453,8 +454,8 @@ int NeuralNetwork::setProperty(std::vector<std::string> values) {
      case PropertyType::epochs: {
        int e;
        status = setInt(e, value);
-      epoch = e;
        NN_RETURN_STATUS();
+      epoch = e;
      } break;
      case PropertyType::train_data: {
        status = std::static_pointer_cast<DataBufferFromDataFile>(data_buffer)
@@ -487,6 +488,13 @@ int NeuralNetwork::setProperty(std::vector<std::string> values) {
      case PropertyType::model_file: {
        model = value;
      } break;
+    case PropertyType::continue_train: {
+      bool cont_train;
+      status = setBoolean(cont_train, value);
+      NN_RETURN_STATUS();
+      continue_train = cont_train;
+      opt.setProperty({values[i]});
+    } break;
      default:
        ml_loge("Error: Unknown Network Property Key");
        status = ML_ERROR_INVALID_PARAMETER;
@@ -526,18 +534,21 @@ int NeuralNetwork::init(std::shared_ptr<Optimizer> optimizer,
        status = layers[i]->setCost(cost);
        NN_RETURN_STATUS();
        break;
-    case LAYER_FC:
+    case LAYER_FC: {
+      std::shared_ptr<FullyConnectedLayer> fc_layer =
+        std::static_pointer_cast<FullyConnectedLayer>(layers[i]);
        layers[i]->setInputDimension(previous_dim);
+
        status = layers[i]->setCost(cost);
        NN_RETURN_STATUS();
  
        status = layers[i]->initialize(last);
        NN_RETURN_STATUS();
  
-      status = layers[i]->setOptimizer(opt);
+      status = fc_layer->setOptimizer(opt);
        NN_RETURN_STATUS();
  
-      break;
+    }  break;
      case LAYER_BN:
        layers[i]->setInputDimension(previous_dim);
        status = layers[i]->initialize(last);
@@ -657,17 +668,20 @@ NeuralNetwork &NeuralNetwork::copy(NeuralNetwork &from) {
  /**
   * @brief     save model
   *            save Weight & Bias Data into file by calling save from layer
+ *            save training parameters from the optimizer
   */
  void NeuralNetwork::saveModel() {
    std::ofstream model_file(model, std::ios::out | std::ios::binary);
    for (unsigned int i = 0; i < layers.size(); i++)
      layers[i]->save(model_file);
+  model_file.write((char *)&iter, sizeof(iter));
    model_file.close();
  }
  
  /**
   * @brief     read model
   *            read Weight & Bias Data into file by calling save from layer
+ *            read training parameters from the optimizer if continuing train
   */
  void NeuralNetwork::readModel() {
    if (!is_file_exist(model))
@@ -675,6 +689,9 @@ void NeuralNetwork::readModel() {
    std::ifstream model_file(model, std::ios::in | std::ios::binary);
    for (unsigned int i = 0; i < layers.size(); i++)
      layers[i]->read(model_file);
+  if (continue_train) {
+    model_file.read((char *)&iter, sizeof(iter));
+  }
    model_file.close();
    ml_logi("read modelfile: %s", model.c_str());
  }
diff --git a/nntrainer/src/optimizer.cpp b/nntrainer/src/optimizer.cpp

index cef9ab9..d2e69b7 100644 (file)
--- a/nntrainer/src/optimizer.cpp
+++ b/nntrainer/src/optimizer.cpp
@@ -21,6 +21,7 @@
   *
   */
  
+#include <iostream>
  #include <nntrainer_error.h>
  #include <nntrainer_log.h>
  #include <lazy_tensor.h>
@@ -30,16 +31,6 @@
  
  namespace nntrainer {
  
-Optimizer::Optimizer() {
-  type = OptType::unknown;
-  popt.learning_rate = 0.0;
-  popt.beta1 = 0.0;
-  popt.beta2 = 0.0;
-  popt.epsilon = 0.0;
-  popt.decay_rate = 0.0;
-  popt.decay_steps = 0.0;
-};
-
  int Optimizer::setType(OptType t) {
    int status = ML_ERROR_NONE;
    if (t == OptType::unknown) {
@@ -180,6 +171,10 @@ int Optimizer::setProperty(std::vector<std::string> values) {
        status = setDouble(popt.epsilon, value);
        NN_RETURN_STATUS();
        break;
+    case PropertyType::continue_train:
+      status = setBoolean(popt.continue_train, value);
+      NN_RETURN_STATUS();
+      break;
      default:
        ml_loge("Error: Unknown Optimizer Property Key");
        status = ML_ERROR_INVALID_PARAMETER;
@@ -189,4 +184,30 @@ int Optimizer::setProperty(std::vector<std::string> values) {
  
    return status;
  }
+
+void Optimizer::read(std::ifstream &file) {
+  OptType loaded_type;
+  file.read((char *)&loaded_type, sizeof(OptType));
+  if (type == OptType::adam and loaded_type == type) {
+    if (popt.continue_train) {
+      wm.read(file);
+      bm.read(file);
+      wv.read(file);
+      bv.read(file);
+    } else {
+      size_t total_size = wm.getSize() + bm.getSize() + wv.getSize() + bv.getSize();
+      file.seekg(total_size, std::ifstream::cur);
+    }
+  }
+}
+
+void Optimizer::save(std::ofstream &file) {
+  file.write((char *)&type, sizeof(OptType));
+  if (type == OptType::adam) {
+    wm.save(file);
+    bm.save(file);
+    wv.save(file);
+    bv.save(file);
+  }
+}
  } // namespace nntrainer
diff --git a/nntrainer/src/parse_util.cpp b/nntrainer/src/parse_util.cpp

index ebc8493..c2fee04 100644 (file)
--- a/nntrainer/src/parse_util.cpp
+++ b/nntrainer/src/parse_util.cpp
@@ -296,10 +296,11 @@ unsigned int parseOptProperty(std::string property) {
     * beta1 = 3,
     * beta2 = 4,
     * epsilon = 5,
+   * continue_train = 6,
     */
-  std::array<std::string, 7> property_string = {
-    "learning_rate", "decay_rate", "decay_steps", "beta1",
-    "beta2",         "epsilon",    "unknown"};
+  std::array<std::string, 8> property_string = {
+    "learning_rate", "decay_rate", "decay_steps", "beta1", "beta2", "epsilon",
+    "continue_train", "unknown"};
  
    for (i = 0; i < property_string.size(); i++) {
      unsigned int size = (property_string[i].size() > property.size())
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Tue, 16 Jun 2020 10:12:04 +0000 (19:12 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Wed, 17 Jun 2020 10:19:22 +0000 (19:19 +0900)
nntrainer/include/fc_layer.h		patch \| blob \| history
nntrainer/include/neuralnet.h		patch \| blob \| history
nntrainer/include/optimizer.h		patch \| blob \| history
nntrainer/include/tensor.h		patch \| blob \| history
nntrainer/src/fc_layer.cpp		patch \| blob \| history
nntrainer/src/layer.cpp		patch \| blob \| history
nntrainer/src/neuralnet.cpp		patch \| blob \| history
nntrainer/src/optimizer.cpp		patch \| blob \| history
nntrainer/src/parse_util.cpp		patch \| blob \| history