if (opt_var) {
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
if (run_context->isGradientLastAccess(i) && getTrainable()) {
- // @note read optimizer variables
- if (run_context->weightHasGradient(i)) {
- for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
- ++j) {
- run_context->getWeightOptVar(i, j).read(file);
- }
+ /// @note read optimizer variables
+ for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+ run_context->getWeightOptVar(i, j).read(file);
}
}
}
}
}
+void LayerNode::clearOptVar() {
+ NNTR_THROW_IF(!run_context, std::runtime_error)
+ << __func__ << " layer needs to be finalized first!";
+ for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
+ if (run_context->isGradientLastAccess(i) && getTrainable()) {
+ /// @note read optimizer variables
+ for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
+ run_context->getWeightOptVar(i, j).initialize();
+ }
+ }
+ }
+}
+
/**
* @brief Finalize creating the layer node
*/
* @brief If the current layer can support in-place
*/
bool LayerNode::supportInPlace() const {
- ///@note below is a quick fix, we need to have a guard that this shouldn't be
+ ///@note below is a quick fix, we need to have a guard that this shouldn't
+ /// be
/// query until realizeProps has been finalized ( which means we will need
/// another end point to fixate this property )
if (getDistribute()) {
initialized(false),
compiled(false),
loadedFromConfig(false),
- loadedWeight(false),
- bin_file_pos(0),
app_context(app_context_) {}
int NeuralNetwork::loadFromConfig(const std::string &config) {
std::get<props::TrainingBatchSize>(model_flex_props));
// initialize optimizer and related variables
+ /// @todo: initialize should take a mode and check if mode is train but
+ /// optimizer is not given, make it as a hard error
if (opt) {
/** TODO: update request of optimizer to be of same format as
* Layer::requestTensor */
initialized = true;
- // @note we need check loadedWeight for the case of multiple call of load to
- // load weight. Only the weight needs to be loaded here. Becuase the buffer
- // for the optimizer is not allocated yet.
- // loadedWeight check is just for the duplicate load of weight.
- if (!load_path.empty() && !loadedWeight) {
+ if (!load_path.empty()) {
load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
}
switch (format) {
case ml::train::ModelFormat::MODEL_FORMAT_BIN: {
auto model_file = checkedOpenStream<std::ofstream>(
- file_path, std::ios::out | std::ios::binary);
+ file_path, std::ios::out | std::ios::binary | std::ios::trunc);
for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
(*iter)->save(model_file);
}
-
- opt->save(model_file);
-
- if (istrequal(opt->getType(), "adam")) {
+ if (opt && istrequal(opt->getType(), "adam")) {
+ model_file.write("adam", 4);
for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
iter++) {
(*iter)->save(model_file, true);
auto model_file = checkedOpenStream<std::ifstream>(
file_path, std::ios::in | std::ios::binary);
- if (!loadedWeight) {
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend();
- iter++) {
- (*iter)->read(model_file);
- }
- loadedWeight = true;
- bin_file_pos = model_file.tellg();
- load_path = file_path;
- return;
+ for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+ (*iter)->read(model_file);
}
try {
/// this is assuming that the failure is allowed at the end of the file
/// read. so, after this line, additional read shouldn't be called
- model_file.seekg(bin_file_pos);
-
- if (istrequal(opt->getType(), "adam")) {
+ if (opt && istrequal(opt->getType(), "adam")) {
char opt_type[4];
model_file.read(opt_type, 4);
if (istrequal(opt_type, "adam")) {
checkedRead(model_file, (char *)&iter, sizeof(iter),
"[NeuralNetwork::readModel] failed to read iteration");
} catch (...) {
- std::cerr << "failed to read epoch idx, proceeding with default index\n";
+ std::cerr << "failed to read additional data like optimizer variable, "
+ "iteration, proceeding with default\n";
}
ml_logi("read modelfile: %s", file_path.c_str());
status = allocate(ExecutionMode::TRAIN);
NN_RETURN_STATUS();
- // @note Need to be here to read the optimizer variables
- if (!load_path.empty()) {
- load(load_path, ml::train::ModelFormat::MODEL_FORMAT_BIN);
- }
-
status = train_run();
NN_RETURN_STATUS();
int status = ML_ERROR_NONE;
if (!std::get<props::ContinueTrain>(model_flex_props)) {
- epoch_idx = 0;
iter = 0;
+ for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
+ (*iter)->clearOptVar();
+ }
}
+ epoch_idx = 0;
+
auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
auto const &outputs = model_graph.getOutputTensors();
swap(lhs.graph_representation, rhs.graph_representation);
swap(lhs.compiled, rhs.compiled);
swap(lhs.loadedFromConfig, rhs.loadedFromConfig);
- swap(lhs.loadedWeight, rhs.loadedWeight);
- swap(lhs.bin_file_pos, rhs.bin_file_pos);
}
}
std::vector<Tensor *> Manager::requestWeightOptimizerVariables(
const std::vector<TensorDim> &dims, const std::string &name,
const TensorLifespan &lifespan, Tensor::Initializer initializer) {
- auto const &exec_order = weight_pool.getExecutionOrder(name);
+ auto const exec_order = weight_pool.getExecutionOrder(name);
std::vector<Tensor *> ret;
ret.reserve(dims.size());
+ /// @note this is assuming weight optimizer variables is treated as weight, if
+ /// not, there is room to optimize below behavior
for (unsigned int idx = 0; idx < dims.size(); idx++)
- ret.push_back(tensor_pool.request(name + ":opt" + std::to_string(idx),
+ ret.push_back(weight_pool.request(name + ":opt" + std::to_string(idx),
dims[idx], exec_order, lifespan,
initializer));