From: Jihoon Lee Date: Fri, 11 Mar 2022 15:42:54 +0000 (+0900) Subject: Change loading meta information behavior X-Git-Tag: accepted/tizen/unified/20220323.062643~5 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=78d35926337dcc9161216a8f651443f0fa1c24af;p=platform%2Fcore%2Fml%2Fnntrainer.git Change loading meta information behavior **Before this PR** optimizer variable loaded from load_path every time. Calling model->train(); in a row became unintuitive 1. model->train() load from original load path thus iteration number roll back to the first one. 2. Same happens for the adam weight 3. model->load(); after model->initialize(); is noop because loadedWeight becomes true **After this PR** 1. model load from load_path only at initialize time 2. model->load is not implicitly overriden **Additional Changes** 1. optimizer weight became part of weights. Now available after initialize() 2. Save format became coherent with load format 3. Some unused variables deleted **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index 6c15112..d565611 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -172,9 +172,6 @@ Tensor &RunLayerContext::getWeightGrad(unsigned int idx) const { */ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx, unsigned int jdx) const { - if (!weights[idx]->hasGradient()) - throw std::invalid_argument( - "Requesting gradient for a non-trainable weight."); return weights[idx]->getOptimizerVariableRef(jdx); } @@ -185,9 +182,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx, * @return int Number of the weight optimizer variable */ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const { - if (!weights[idx]->hasGradient()) - throw std::invalid_argument( - "Requesting gradient for a non-trainable weight."); return weights[idx]->getNumOptVariable(); } diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index f9e6d50..0820c59 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -421,12 +421,9 @@ void LayerNode::read(std::ifstream &file, bool opt_var) { if (opt_var) { for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { if (run_context->isGradientLastAccess(i) && getTrainable()) { - // @note read optimizer variables - if (run_context->weightHasGradient(i)) { - for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); - ++j) { - run_context->getWeightOptVar(i, j).read(file); - } + /// @note read optimizer variables + for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { + run_context->getWeightOptVar(i, j).read(file); } } } @@ -466,6 +463,19 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const { } } +void LayerNode::clearOptVar() { + NNTR_THROW_IF(!run_context, std::runtime_error) + << __func__ << " layer needs to be finalized first!"; + for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { + if (run_context->isGradientLastAccess(i) && getTrainable()) { + /// @note read optimizer variables + for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { + run_context->getWeightOptVar(i, j).initialize(); + } + } + } +} + /** * @brief Finalize creating the layer node */ @@ -624,7 +634,8 @@ void LayerNode::setBatch(unsigned int batch) { * @brief If the current layer can support in-place */ bool LayerNode::supportInPlace() const { - ///@note below is a quick fix, we need to have a guard that this shouldn't be + ///@note below is a quick fix, we need to have a guard that this shouldn't + /// be /// query until realizeProps has been finalized ( which means we will need /// another end point to fixate this property ) if (getDistribute()) { diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index ede2006..9a50f22 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -584,6 +584,12 @@ public: void save(std::ofstream &file, bool opt_var = false) const; /** + * @brief clear optimizer variable to initial state + * + */ + void clearOptVar(); + + /** * @brief get loss for the layer * @return loss of the layer */ diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 8eaba4d..b7a30e0 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -71,8 +71,6 @@ NeuralNetwork::NeuralNetwork(AppContext app_context_) : initialized(false), compiled(false), loadedFromConfig(false), - loadedWeight(false), - bin_file_pos(0), app_context(app_context_) {} int NeuralNetwork::loadFromConfig(const std::string &config) { @@ -189,6 +187,8 @@ int NeuralNetwork::initialize() { std::get(model_flex_props)); // initialize optimizer and related variables + /// @todo: initialize should take a mode and check if mode is train but + /// optimizer is not given, make it as a hard error if (opt) { /** TODO: update request of optimizer to be of same format as * Layer::requestTensor */ @@ -205,11 +205,7 @@ int NeuralNetwork::initialize() { initialized = true; - // @note we need check loadedWeight for the case of multiple call of load to - // load weight. Only the weight needs to be loaded here. Becuase the buffer - // for the optimizer is not allocated yet. - // loadedWeight check is just for the duplicate load of weight. - if (!load_path.empty() && !loadedWeight) { + if (!load_path.empty()) { load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN); } @@ -328,14 +324,12 @@ void NeuralNetwork::save(const std::string &file_path, switch (format) { case ml::train::ModelFormat::MODEL_FORMAT_BIN: { auto model_file = checkedOpenStream( - file_path, std::ios::out | std::ios::binary); + file_path, std::ios::out | std::ios::binary | std::ios::trunc); for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { (*iter)->save(model_file); } - - opt->save(model_file); - - if (istrequal(opt->getType(), "adam")) { + if (opt && istrequal(opt->getType(), "adam")) { + model_file.write("adam", 4); for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { (*iter)->save(model_file, true); @@ -381,22 +375,13 @@ void NeuralNetwork::load(const std::string &file_path, auto model_file = checkedOpenStream( file_path, std::ios::in | std::ios::binary); - if (!loadedWeight) { - for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); - iter++) { - (*iter)->read(model_file); - } - loadedWeight = true; - bin_file_pos = model_file.tellg(); - load_path = file_path; - return; + for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { + (*iter)->read(model_file); } try { /// this is assuming that the failure is allowed at the end of the file /// read. so, after this line, additional read shouldn't be called - model_file.seekg(bin_file_pos); - - if (istrequal(opt->getType(), "adam")) { + if (opt && istrequal(opt->getType(), "adam")) { char opt_type[4]; model_file.read(opt_type, 4); if (istrequal(opt_type, "adam")) { @@ -412,7 +397,8 @@ void NeuralNetwork::load(const std::string &file_path, checkedRead(model_file, (char *)&iter, sizeof(iter), "[NeuralNetwork::readModel] failed to read iteration"); } catch (...) { - std::cerr << "failed to read epoch idx, proceeding with default index\n"; + std::cerr << "failed to read additional data like optimizer variable, " + "iteration, proceeding with default\n"; } ml_logi("read modelfile: %s", file_path.c_str()); @@ -644,11 +630,6 @@ int NeuralNetwork::train(const std::vector &values) { status = allocate(ExecutionMode::TRAIN); NN_RETURN_STATUS(); - // @note Need to be here to read the optimizer variables - if (!load_path.empty()) { - load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN); - } - status = train_run(); NN_RETURN_STATUS(); @@ -668,10 +649,14 @@ int NeuralNetwork::train_run() { int status = ML_ERROR_NONE; if (!std::get(model_flex_props)) { - epoch_idx = 0; iter = 0; + for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { + (*iter)->clearOptVar(); + } } + epoch_idx = 0; + auto batch_size = std::get(model_flex_props); auto const &outputs = model_graph.getOutputTensors(); @@ -845,8 +830,6 @@ void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) { swap(lhs.graph_representation, rhs.graph_representation); swap(lhs.compiled, rhs.compiled); swap(lhs.loadedFromConfig, rhs.loadedFromConfig); - swap(lhs.loadedWeight, rhs.loadedWeight); - swap(lhs.bin_file_pos, rhs.bin_file_pos); } } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index b12a476..8b4a19a 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -530,10 +530,6 @@ private: bool loadedFromConfig; /**< Check if config is loaded to prevent load twice */ - bool loadedWeight; /**< Check if weight is loaded to prevent load twice */ - - uint64_t bin_file_pos; /**< save file position to load later*/ - RunStats validation; /** validation statistics of the model */ RunStats training; /** training statistics of the model */ RunStats testing; /** testing statistics of the model */ diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index 7b212e0..4a88081 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -597,13 +597,15 @@ bool Manager::isSecondLastAccess(const std::string &name, std::vector Manager::requestWeightOptimizerVariables( const std::vector &dims, const std::string &name, const TensorLifespan &lifespan, Tensor::Initializer initializer) { - auto const &exec_order = weight_pool.getExecutionOrder(name); + auto const exec_order = weight_pool.getExecutionOrder(name); std::vector ret; ret.reserve(dims.size()); + /// @note this is assuming weight optimizer variables is treated as weight, if + /// not, there is room to optimize below behavior for (unsigned int idx = 0; idx < dims.size(); idx++) - ret.push_back(tensor_pool.request(name + ":opt" + std::to_string(idx), + ret.push_back(weight_pool.request(name + ":opt" + std::to_string(idx), dims[idx], exec_order, lifespan, initializer));