From a89facd88a6784d941761e72723868aa0fca9b19 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Thu, 20 Jan 2022 18:06:41 +0900 Subject: [PATCH] [ SAVE/LOAD ] save / load optimizer variables Enable save / load optimizer variables such as M and V for adam optimizer **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- nntrainer/layers/layer_context.cpp | 28 +++++++++++++++++++++++++ nntrainer/layers/layer_context.h | 17 +++++++++++++++ nntrainer/layers/layer_node.cpp | 42 +++++++++++++++++++++++++++++--------- nntrainer/layers/layer_node.h | 6 ++++-- nntrainer/models/neuralnet.cpp | 41 ++++++++++++++++++++++++++++++++++--- nntrainer/models/neuralnet.h | 4 ++++ nntrainer/tensor/weight.h | 6 ++++++ 7 files changed, 129 insertions(+), 15 deletions(-) diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index 92011f5..6c15112 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -164,6 +164,34 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const { } /** + * @brief Get the Weight Optimizer Variable tensor object + * + * @param idx Identifier of the weight + * @param jdx Identifier of the optimizer variables + * @return Tensor& Reference to the weight optimizer variable tensor + */ +Tensor &RunLayerContext::getWeightOptVar(unsigned int idx, + unsigned int jdx) const { + if (!weights[idx]->hasGradient()) + throw std::invalid_argument( + "Requesting gradient for a non-trainable weight."); + return weights[idx]->getOptimizerVariableRef(jdx); +} + +/** + * @brief Get the Number of Weight Optimizer Variable tensor object + * + * @param idx Identifier of the weight + * @return int Number of the weight optimizer variable + */ +unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const { + if (!weights[idx]->hasGradient()) + throw std::invalid_argument( + "Requesting gradient for a non-trainable weight."); + return weights[idx]->getNumOptVariable(); +} + +/** * @brief Get regularization loss for the weight * * @param idx Identifier of the weight diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index 3f0a0c7..ef2bf26 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -363,6 +363,15 @@ public: Tensor &getWeightGrad(unsigned int idx) const; /** + * @brief Get the Weight Optimizer Variable tensor object + * + * @param idx Identifier of the weight + * @param jdx Identifier of the weight optimizer variable + * @return Tensor& Reference to the weight grad tensor + */ + Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const; + + /** * @brief Get the Weight name * * @param idx Identifier of the weight @@ -580,6 +589,14 @@ public: unsigned int getNumWeights() const { return weights.size(); } /** + * @brief Get the Number of Weight Optimizer Variable tensor object + * + * @param idx Identifier of the weight + * @return unsigned int Number of the weight optimizer variable + */ + unsigned int getNumWeightOptVar(unsigned int idx) const; + + /** * @brief Get the number of requested tensors objects * * @return unsigned int number of requested tensors diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 065c90a..1a593b0 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -415,24 +415,46 @@ void LayerNode::exportTo(Exporter &exporter, layer->exportTo(exporter, method); } -void LayerNode::read(std::ifstream &file) { +void LayerNode::read(std::ifstream &file, bool opt_var) { NNTR_THROW_IF(!run_context, std::runtime_error) << __func__ << " layer needs to be finalized first!"; - for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { - /// @note shared weights are only be read at the first acecss - if (run_context->isGradientLastAccess(i)) { - run_context->getWeight(i).read(file); + if (opt_var) { + for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { + if (run_context->isGradientLastAccess(i)) { + // @note read optimizer variables + for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { + run_context->getWeightOptVar(i, j).read(file); + } + } + } + } else { + for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { + /// @note shared weights are only be read at the first acecss + if (run_context->isGradientLastAccess(i)) { + run_context->getWeight(i).read(file); + } } } } -void LayerNode::save(std::ofstream &file) const { +void LayerNode::save(std::ofstream &file, bool opt_var) const { NNTR_THROW_IF(!run_context, std::runtime_error) << __func__ << " layer needs to be finalized first!"; - /// @note shared weights are only be saved at the first access - for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { - if (run_context->isGradientLastAccess(i)) { - run_context->getWeight(i).save(file); + if (opt_var) { + for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { + if (run_context->isGradientLastAccess(i)) { + // @note save optimizer variables + for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { + run_context->getWeightOptVar(i, j).save(file); + } + } + } + } else { + // @note shared weights are only be saved at the first access + for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { + if (run_context->isGradientLastAccess(i)) { + run_context->getWeight(i).save(file); + } } } } diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 5538a88..ede2006 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -572,14 +572,16 @@ public: /** * @brief read layer Weight & Bias data from file * @param file input file stream + * @param bool read optimizer variables */ - void read(std::ifstream &file); + void read(std::ifstream &file, bool opt_var = false); /** * @brief save layer Weight & Bias data from file * @param file output file stream + * @param bool save optimizer variables */ - void save(std::ofstream &file) const; + void save(std::ofstream &file, bool opt_var = false) const; /** * @brief get loss for the layer diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index b16b013..daa47f2 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -71,6 +71,8 @@ NeuralNetwork::NeuralNetwork(AppContext app_context_) : initialized(false), compiled(false), loadedFromConfig(false), + loadedWeight(false), + bin_file_pos(0), app_context(app_context_) {} int NeuralNetwork::loadFromConfig(const std::string &config) { @@ -323,8 +325,19 @@ void NeuralNetwork::save(const std::string &file_path, for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { (*iter)->save(model_file); } + + opt->save(model_file); + + if (opt->getType() == "adam") { + for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); + iter++) { + (*iter)->save(model_file, true); + } + } + model_file.write((char *)&epoch_idx, sizeof(epoch_idx)); model_file.write((char *)&iter, sizeof(iter)); + model_file.close(); break; } @@ -361,13 +374,28 @@ void NeuralNetwork::load(const std::string &file_path, auto model_file = checkedOpenStream( file_path, std::ios::in | std::ios::binary); - for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { - (*iter)->read(model_file); + if (!loadedWeight) { + for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); + iter++) { + (*iter)->read(model_file); + } + loadedWeight = true; + bin_file_pos = model_file.tellg(); } - try { /// this is assuming that the failure is allowed at the end of the file /// read. so, after this line, additional read shouldn't be called + model_file.seekg(bin_file_pos); + + std::string opt_type; + opt_type = readString(model_file); + if (istrequal(opt_type, "adam") && istrequal(opt->getType(), "adam")) { + for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); + iter++) { + (*iter)->read(model_file, true); + } + } + checkedRead(model_file, (char *)&epoch_idx, sizeof(epoch_idx), "[NeuralNetwork::readModel] failed to read epoch_idx"); checkedRead(model_file, (char *)&iter, sizeof(iter), @@ -604,6 +632,11 @@ int NeuralNetwork::train(const std::vector &values) { status = allocate(ExecutionMode::TRAIN); NN_RETURN_STATUS(); + // @note Need to be here to read the optimizer variables + if (!load_path.empty()) { + load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN); + } + status = train_run(); NN_RETURN_STATUS(); @@ -800,6 +833,8 @@ void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) { swap(lhs.graph_representation, rhs.graph_representation); swap(lhs.compiled, rhs.compiled); swap(lhs.loadedFromConfig, rhs.loadedFromConfig); + swap(lhs.loadedWeight, rhs.loadedWeight); + swap(lhs.bin_file_pos, rhs.bin_file_pos); } } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 0e86017..e87c4c1 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -530,6 +530,10 @@ private: bool loadedFromConfig; /**< Check if config is loaded to prevent load twice */ + bool loadedWeight; /**< Check if weight is loaded to prevent load twice */ + + uint64_t bin_file_pos; /**< save file position to load later*/ + RunStats validation; /** validation statistics of the model */ RunStats training; /** training statistics of the model */ RunStats testing; /** testing statistics of the model */ diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h index c5c6841..d13ae1b 100644 --- a/nntrainer/tensor/weight.h +++ b/nntrainer/tensor/weight.h @@ -198,6 +198,12 @@ public: Tensor &getOptimizerVariableRef(unsigned int idx) { return *opt_vars[idx]; } /** + * @brief Get number of optimizer variable + * @retval number of optimizer variable + */ + int getNumOptVariable() { return opt_vars.size(); } + + /** * @brief check if weight regularizer type is l2norm * @return bool is weight regrulatizer type is L2 Norm */ -- 2.7.4