int setProperty(std::vector<std::string> values);
/**
+ * @brief Optimizer Setter
+ * @param[in] opt Optimizer
+ * @retval #ML_ERROR_NONE Successful.
+ * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+ */
+ int setOptimizer(Optimizer &opt);
+
+ /**
* @brief Property Enumeration
* 1. bias zero : bool
* 4. activation : bool
int backwarding(Tensor input, Tensor expected_output, int iteration);
/**
- * @brief save W & B into file
+ * @brief save model and training parameters into file
*/
void saveModel();
/**
- * @brief read W & B from file
+ * @brief read model and training parameters from file
*/
void readModel();
batch_size = 7,
epochs = 8,
model_file = 9,
+ continue_train = 10,
};
private:
/**
* @brief Optimizer
+ * @note This gets copied into each layer, do not use this directly
*/
Optimizer opt;
std::shared_ptr<DataBuffer> data_buffer;
/**
+ * @brief Continue train from the previous state of optimizer and iterations
+ */
+ bool continue_train;
+
+ /**
* @brief Number of iterations trained
*/
- int iter;
+ uint64_t iter;
};
} /* namespace nntrainer */
/**
* @brief type for the Optimizor to save hyper-parameter
*/
-typedef struct {
+typedef struct _OptParam {
float learning_rate;
double beta1;
double beta2;
double epsilon;
float decay_rate;
float decay_steps;
+ bool continue_train; /** Continue training with previous tensors for adam */
+
+ _OptParam() : learning_rate(0.0), beta1(0.0), beta2(0.0), epsilon(0.0),
+ decay_rate(0.0), decay_steps(0.0), continue_train(false) {}
} OptParam;
class Optimizer {
/**
* @brief Constructor of Optimizer Class
*/
- Optimizer();
+ Optimizer() : type(OptType::unknown), popt() {}
/**
* @brief Destructor of Optimizer Class
*/
- ~Optimizer(){};
+ ~Optimizer() {}
/**
* @brief set Optimizer Type
/**
* @brief initialize optimizer. Initialize Weight if it is adam
* @param[in] d TensorDim
- * @param[in] setTensor true if the layer need wieght update.
- * Input Layer and Batch Noramlization layer won't need it.
+ * @param[in] setTensor true if the layer need weight update.
+ * Input Layer and Batch Normalization layer won't need it.
* Therefore, it sets false.
* @retval #ML_ERROR_NONE Successful.
* @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
beta1 = 3,
beta2 = 4,
epsilon = 5,
+ continue_train = 6,
};
+ /**
+ * @brief Read Training optimizer paramters from file
+ * @param[in] file input stream file
+ */
+ void read(std::ifstream &file);
+
+ /**
+ * @brief Save Training optimizer paramters from file
+ * @param[in] file output stream file
+ */
+ void save(std::ofstream &file);
+
private:
/**
* @brief Optimizer Type
int getBatch() const { return dim.batch(); };
/**
+ * @brief Get size of the data
+ * @retval size_t Size in bytes
+ */
+ size_t getSize() const {
+ return dim.getDataLen() * sizeof(decltype(data)::value_type);
+ }
+
+ /**
* @brief Set the element value
* @param[in] batch batch location
* @param[in] c channel location
return status;
}
+int FullyConnectedLayer::setOptimizer(Optimizer &opt) {
+ int status = Layer::setOptimizer(opt);
+ if (status != ML_ERROR_NONE)
+ return status;
+
+ return this->opt.initialize(dim, true);
+}
+
Tensor FullyConnectedLayer::forwarding(Tensor in, int &status) {
input = in;
hidden = input.chain().dot(weight).add_i(bias).run();
void FullyConnectedLayer::read(std::ifstream &file) {
weight.read(file);
bias.read(file);
+ opt.read(file);
}
void FullyConnectedLayer::save(std::ofstream &file) {
weight.save(file);
bias.save(file);
+ opt.save(file);
}
void FullyConnectedLayer::copy(std::shared_ptr<Layer> l) {
this->opt.setType(opt.getType());
this->opt.setOptParam(opt.getOptParam());
- return this->opt.initialize(dim, true);
+ return this->opt.initialize(dim, false);
}
int Layer::checkValidation() {
net_type = NET_UNKNOWN;
data_buffer = NULL;
iter = 0;
+ continue_train = false;
this->setConfig(config);
}
case PropertyType::epochs: {
int e;
status = setInt(e, value);
- epoch = e;
NN_RETURN_STATUS();
+ epoch = e;
} break;
case PropertyType::train_data: {
status = std::static_pointer_cast<DataBufferFromDataFile>(data_buffer)
case PropertyType::model_file: {
model = value;
} break;
+ case PropertyType::continue_train: {
+ bool cont_train;
+ status = setBoolean(cont_train, value);
+ NN_RETURN_STATUS();
+ continue_train = cont_train;
+ opt.setProperty({values[i]});
+ } break;
default:
ml_loge("Error: Unknown Network Property Key");
status = ML_ERROR_INVALID_PARAMETER;
status = layers[i]->setCost(cost);
NN_RETURN_STATUS();
break;
- case LAYER_FC:
+ case LAYER_FC: {
+ std::shared_ptr<FullyConnectedLayer> fc_layer =
+ std::static_pointer_cast<FullyConnectedLayer>(layers[i]);
layers[i]->setInputDimension(previous_dim);
+
status = layers[i]->setCost(cost);
NN_RETURN_STATUS();
status = layers[i]->initialize(last);
NN_RETURN_STATUS();
- status = layers[i]->setOptimizer(opt);
+ status = fc_layer->setOptimizer(opt);
NN_RETURN_STATUS();
- break;
+ } break;
case LAYER_BN:
layers[i]->setInputDimension(previous_dim);
status = layers[i]->initialize(last);
/**
* @brief save model
* save Weight & Bias Data into file by calling save from layer
+ * save training parameters from the optimizer
*/
void NeuralNetwork::saveModel() {
std::ofstream model_file(model, std::ios::out | std::ios::binary);
for (unsigned int i = 0; i < layers.size(); i++)
layers[i]->save(model_file);
+ model_file.write((char *)&iter, sizeof(iter));
model_file.close();
}
/**
* @brief read model
* read Weight & Bias Data into file by calling save from layer
+ * read training parameters from the optimizer if continuing train
*/
void NeuralNetwork::readModel() {
if (!is_file_exist(model))
std::ifstream model_file(model, std::ios::in | std::ios::binary);
for (unsigned int i = 0; i < layers.size(); i++)
layers[i]->read(model_file);
+ if (continue_train) {
+ model_file.read((char *)&iter, sizeof(iter));
+ }
model_file.close();
ml_logi("read modelfile: %s", model.c_str());
}
*
*/
+#include <iostream>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <lazy_tensor.h>
namespace nntrainer {
-Optimizer::Optimizer() {
- type = OptType::unknown;
- popt.learning_rate = 0.0;
- popt.beta1 = 0.0;
- popt.beta2 = 0.0;
- popt.epsilon = 0.0;
- popt.decay_rate = 0.0;
- popt.decay_steps = 0.0;
-};
-
int Optimizer::setType(OptType t) {
int status = ML_ERROR_NONE;
if (t == OptType::unknown) {
status = setDouble(popt.epsilon, value);
NN_RETURN_STATUS();
break;
+ case PropertyType::continue_train:
+ status = setBoolean(popt.continue_train, value);
+ NN_RETURN_STATUS();
+ break;
default:
ml_loge("Error: Unknown Optimizer Property Key");
status = ML_ERROR_INVALID_PARAMETER;
return status;
}
+
+void Optimizer::read(std::ifstream &file) {
+ OptType loaded_type;
+ file.read((char *)&loaded_type, sizeof(OptType));
+ if (type == OptType::adam and loaded_type == type) {
+ if (popt.continue_train) {
+ wm.read(file);
+ bm.read(file);
+ wv.read(file);
+ bv.read(file);
+ } else {
+ size_t total_size = wm.getSize() + bm.getSize() + wv.getSize() + bv.getSize();
+ file.seekg(total_size, std::ifstream::cur);
+ }
+ }
+}
+
+void Optimizer::save(std::ofstream &file) {
+ file.write((char *)&type, sizeof(OptType));
+ if (type == OptType::adam) {
+ wm.save(file);
+ bm.save(file);
+ wv.save(file);
+ bv.save(file);
+ }
+}
} // namespace nntrainer
* beta1 = 3,
* beta2 = 4,
* epsilon = 5,
+ * continue_train = 6,
*/
- std::array<std::string, 7> property_string = {
- "learning_rate", "decay_rate", "decay_steps", "beta1",
- "beta2", "epsilon", "unknown"};
+ std::array<std::string, 8> property_string = {
+ "learning_rate", "decay_rate", "decay_steps", "beta1", "beta2", "epsilon",
+ "continue_train", "unknown"};
for (i = 0; i < property_string.size(); i++) {
unsigned int size = (property_string[i].size() > property.size())