void *user_data) {
static int count = 0;
unsigned int i;
- unsigned int data_size = BATCH_SIZE * FEATURE_SIZE;
+ unsigned int data_size = FEATURE_SIZE;
for (i = 0; i < data_size; ++i) {
outVec[0][i] = 2.0f;
}
outLabel[0][0] = 1.0f;
+ count++;
if (count == 10) {
*last = true;
count = 0;
} else {
*last = false;
- count++;
}
return ML_ERROR_NONE;
* (test.txt)
*/
+#include <algorithm>
#include <cmath>
#include <fstream>
#include <iostream>
+#include <random>
#include <sstream>
-#include <stdlib.h>
-#include <time.h>
#include <databuffer.h>
#include <neuralnet.h>
const unsigned int total_val_data_size = 10;
+constexpr unsigned int SEED = 0;
+
bool training = false;
/**
/**
* @brief get idth Data
* @param[in] F file stream
- * @param[out] outVec feature data
- * @param[out] outLabel label data
+ * @param[out] input feature data
+ * @param[out] label label data
* @param[in] id id th
* @retval boolean true if there is no error
*/
-bool getData(std::ifstream &F, std::vector<float> &outVec,
- std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
std::string temp;
F.clear();
F.seekg(0, std::ios_base::beg);
float x;
for (unsigned int j = 0; j < feature_size; ++j) {
buffer >> x;
- outVec[j] = x;
+ input[j] = x;
}
buffer >> x;
- outLabel[0] = x;
+ label[0] = x;
return true;
}
+std::mt19937 rng;
+std::vector<unsigned int> train_idxes;
+
/**
- * @brief get Data as much as batch size
+ * @brief get a single data
* @param[out] outVec feature data
* @param[out] outLabel label data
* @param[out] last end of data
* @param[in] user_data user data
* @retval int 0 if there is no error
*/
-int getBatch_train(float **outVec, float **outLabel, bool *last,
- void *user_data) {
+int getSample_train(float **outVec, float **outLabel, bool *last,
+ void *user_data) {
std::ifstream dataFile(data_file);
- unsigned int data_size = total_train_data_size;
- unsigned int count = 0;
- if (data_size - train_count < batch_size) {
+ if (!getData(dataFile, *outVec, *outLabel, train_idxes.at(train_count))) {
+ return -1;
+ }
+ train_count++;
+ if (train_count < total_train_data_size) {
+ *last = false;
+ } else {
*last = true;
train_count = 0;
- return 0;
- }
-
- for (unsigned int i = train_count; i < train_count + batch_size; ++i) {
-
- std::vector<float> o;
- std::vector<float> l;
- o.resize(feature_size);
- l.resize(1);
-
- if (!getData(dataFile, o, l, i)) {
- return -1;
- };
-
- for (unsigned int j = 0; j < feature_size; ++j)
- outVec[0][count * feature_size + j] = o[j];
- outLabel[0][count] = l[0];
-
- count++;
+ std::shuffle(train_idxes.begin(), train_idxes.end(), rng);
}
- dataFile.close();
- *last = false;
- train_count += batch_size;
return 0;
}
}
const std::string weight_path = "logistic_model.bin";
+ train_idxes.resize(total_train_data_size);
+ std::iota(train_idxes.begin(), train_idxes.end(), 0);
+ rng.seed(SEED);
const std::vector<std::string> args(argv + 1, argv + argc);
std::string config = args[1];
srand(time(NULL));
- auto data_train =
- ml::train::createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+ auto data_train = ml::train::createDataset(ml::train::DatasetType::GENERATOR,
+ getSample_train);
/**
* @brief Create NN
o.resize(feature_size);
l.resize(1);
- getData(dataFile, o, l, j);
+ getData(dataFile, o.data(), l.data(), j);
try {
float answer =
#define APP_VALIDATE
#endif
+#include <algorithm>
#include <climits>
#include <cmath>
#include <fstream>
#include <iomanip>
#include <iostream>
-#include <queue>
+#include <memory>
+#include <random>
#include <sstream>
-#include <stdlib.h>
-#include <time.h>
+#include <vector>
#if defined(APP_VALIDATE)
#include <gtest/gtest.h>
#define VALIDATION false
+constexpr unsigned int SEED = 0;
+
#if VALIDATION
/**
* @brief Data size for each category
const unsigned int total_test_data_size = 32;
-const unsigned int buffer_size = 32;
-
const unsigned int batch_size = 32;
#else
const unsigned int total_test_data_size = 100;
-const unsigned int buffer_size = 100;
-
const unsigned int batch_size = 32;
#endif
/**
* @brief load data at specific position of file
* @param[in] F ifstream (input file)
- * @param[out] outVec
- * @param[out] outLabel
+ * @param[out] input input
+ * @param[out] label label
* @param[in] id th data to get
* @retval true/false false : end of data
*/
-bool getData(std::ifstream &F, std::vector<float> &outVec,
- std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
F.clear();
F.seekg(0, std::ios_base::end);
uint64_t file_length = F.tellg();
return false;
}
F.seekg(position, std::ios::beg);
- for (unsigned int i = 0; i < feature_size; i++)
- F.read((char *)&outVec[i], sizeof(float));
- for (unsigned int i = 0; i < total_label_size; i++)
- F.read((char *)&outLabel[i], sizeof(float));
+ F.read((char *)input, sizeof(float) * feature_size);
+ F.read((char *)label, sizeof(float) * total_label_size);
return true;
}
/**
- * @brief get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
+ * @brief UserData which stores information used to feed data from data callback
+ *
*/
-int getBatch_train(float **outVec, float **outLabel, bool *last,
- void *user_data) {
- std::vector<int> memI;
- std::vector<int> memJ;
- unsigned int count = 0;
- int data_size = total_train_data_size;
-
- std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-#if VALIDATION
- if (data_size - train_count < batch_size) {
-#else
- if (data_size * total_label_size - train_count < batch_size) {
-#endif
- *last = true;
- train_count = 0;
- return ML_ERROR_NONE;
- }
-
- count = 0;
- for (unsigned int i = train_count; i < train_count + batch_size; i++) {
- std::vector<float> o;
- std::vector<float> l;
-
- o.resize(feature_size);
- l.resize(total_label_size);
-
- getData(F, o, l, i);
-
- for (unsigned int j = 0; j < feature_size; ++j)
- outVec[0][count * feature_size + j] = o[j];
- for (unsigned int j = 0; j < total_label_size; ++j)
- outLabel[0][count * total_label_size + j] = l[j];
- count++;
+class DataInformation {
+public:
+ /**
+ * @brief Construct a new Data Information object
+ *
+ * @param num_samples number of data
+ * @param filename file name to read from
+ */
+ DataInformation(unsigned int num_samples, const std::string &filename);
+ unsigned int count;
+ unsigned int num_samples;
+ std::ifstream file;
+ std::vector<unsigned int> idxes;
+ std::mt19937 rng;
+};
+
+DataInformation::DataInformation(unsigned int num_samples,
+ const std::string &filename) :
+ count(0),
+ num_samples(num_samples),
+ file(filename, std::ios::in | std::ios::binary),
+ idxes(num_samples) {
+ std::iota(idxes.begin(), idxes.end(), 0);
+ rng.seed(SEED);
+ std::shuffle(idxes.begin(), idxes.end(), rng);
+ if (!file.good()) {
+ throw std::invalid_argument("given file is not good, filename: " +
+ filename);
}
-
- F.close();
- *last = false;
- train_count += batch_size;
- return ML_ERROR_NONE;
}
/**
- * @brief get data which size is batch for validation
- * @param[out] outVec
- * @param[out] outLabel
+ * @brief get data which size is batch for train
+ * @param[out] outInput input vectors
+ * @param[out] outLabel label vectors
* @param[out] last if the data is finished
* @param[in] user_data private data for the callback
* @retval status for handling error
*/
-int getBatch_val(float **outVec, float **outLabel, bool *last,
- void *user_data) {
-
- std::vector<int> memI;
- std::vector<int> memJ;
- unsigned int count = 0;
- int data_size = total_val_data_size;
-
- std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-#if VALIDATION
- if (data_size - val_count < batch_size) {
-#else
- if (data_size * total_label_size - val_count < batch_size) {
-#endif
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data) {
+ auto data = reinterpret_cast<DataInformation *>(user_data);
+
+ getData(data->file, *outVec, *outLabel, data->idxes.at(data->count));
+ data->count++;
+ if (data->count < data->num_samples) {
+ *last = false;
+ } else {
*last = true;
- val_count = 0;
- return ML_ERROR_NONE;
+ data->count = 0;
+ std::shuffle(data->idxes.begin(), data->idxes.end(), data->rng);
}
- count = 0;
- for (unsigned int i = val_count; i < val_count + batch_size; i++) {
- std::vector<float> o;
- std::vector<float> l;
-
- o.resize(feature_size);
- l.resize(total_label_size);
-
- getData(F, o, l, i);
-
- for (unsigned int j = 0; j < feature_size; ++j)
- outVec[0][count * feature_size + j] = o[j];
- for (unsigned int j = 0; j < total_label_size; ++j)
- outLabel[0][count * total_label_size + j] = l[j];
- count++;
- }
-
- F.close();
- *last = false;
- val_count += batch_size;
return ML_ERROR_NONE;
}
#if defined(APP_VALIDATE)
TEST(MNIST_training, verify_accuracy) {
- EXPECT_FLOAT_EQ(training_loss, 2.3031187);
- EXPECT_FLOAT_EQ(validation_loss, 2.2951343);
+ EXPECT_FLOAT_EQ(training_loss, 2.5698349);
+ EXPECT_FLOAT_EQ(validation_loss, 2.5551746);
}
#endif
int status = 0;
#ifdef APP_VALIDATE
status = remove("mnist_model.bin");
- if (status != 0)
+ if (status != 0) {
std::cout << "Pre-existing model file doesn't exist.\n";
+ }
#endif
if (argc < 3) {
std::cout << "./nntrainer_mnist mnist.ini dataset.dat\n";
std::string config = args[0];
filename = args[1];
- std::ifstream f(filename);
- if (!f.good()) {
- std::cout << "dataset is not good, filename: " << filename << '\n';
- exit(1);
+ std::unique_ptr<DataInformation> train_user_data;
+ std::unique_ptr<DataInformation> valid_user_data;
+ try {
+ train_user_data =
+ std::make_unique<DataInformation>(total_train_data_size, filename);
+ valid_user_data =
+ std::make_unique<DataInformation>(total_val_data_size, filename);
+ } catch (std::invalid_argument &e) {
+ std::cerr << "Error creating userdata for the data callback " << e.what()
+ << std::endl;
+ return 1;
}
- srand(time(NULL));
- std::vector<std::vector<float>> inputVector, outputVector;
- std::vector<std::vector<float>> inputValVector, outputValVector;
- std::vector<std::vector<float>> inputTestVector, outputTestVector;
-
/**
* @brief Data buffer Create & Initialization
*/
std::shared_ptr<ml::train::Dataset> dataset_train, dataset_val;
try {
- dataset_train =
- createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
- dataset_val =
- createDataset(ml::train::DatasetType::GENERATOR, getBatch_val);
+ dataset_train = createDataset(ml::train::DatasetType::GENERATOR, getSample,
+ train_user_data.get());
+ dataset_val = createDataset(ml::train::DatasetType::GENERATOR, getSample,
+ valid_user_data.get());
} catch (std::exception &e) {
std::cerr << "Error creating dataset" << e.what() << std::endl;
return 1;
#include <cmath>
#include <fstream>
#include <iostream>
+#include <random>
#include <sstream>
-#include <stdlib.h>
-#include <time.h>
#include <dataset.h>
#include <ml-api-common.h>
std::string data_file;
+constexpr unsigned int SEED = 0;
+
const unsigned int total_train_data_size = 25;
unsigned int train_count = 0;
/**
* @brief get idth Data
* @param[in] F file stream
- * @param[out] outVec feature data
- * @param[out] outLabel label data
+ * @param[out] input feature data
+ * @param[out] label label data
* @param[in] id id th
* @retval boolean true if there is no error
*/
-bool getData(std::ifstream &F, std::vector<float> &outVec,
- std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
std::string temp;
F.clear();
F.seekg(0, std::ios_base::beg);
F.putback(c);
- if (!std::getline(F, temp)) {
+ if (!std::getline(F, temp))
return false;
- }
std::istringstream buffer(temp);
float x;
for (unsigned int j = 0; j < feature_size; ++j) {
buffer >> x;
- outVec[j] = x;
+ input[j] = x;
}
buffer >> x;
- outLabel[0] = x;
+ label[0] = x;
return true;
}
file.close();
}
+std::mt19937 rng;
+std::vector<unsigned int> train_idxes;
+
/**
- * @brief get Data as much as batch size
+ * @brief get a single data
* @param[out] outVec feature data
* @param[out] outLabel label data
* @param[out] last end of data
* @param[in] user_data user data
* @retval int 0 if there is no error
*/
-int getBatch_train(float **outVec, float **outLabel, bool *last,
- void *user_data) {
+int getSample_train(float **outVec, float **outLabel, bool *last,
+ void *user_data) {
std::ifstream dataFile(data_file);
- unsigned int data_size = total_train_data_size;
- unsigned int count = 0;
-
- if (data_size - train_count < batch_size) {
+ if (!getData(dataFile, *outVec, *outLabel, train_idxes.at(train_count))) {
+ return -1;
+ }
+ train_count++;
+ if (train_count < total_train_data_size) {
+ *last = false;
+ } else {
*last = true;
train_count = 0;
- return 0;
+ std::shuffle(train_idxes.begin(), train_idxes.end(), rng);
}
- std::vector<float> o;
- std::vector<float> l;
- o.resize(feature_size);
- l.resize(1);
-
- for (unsigned int i = train_count; i < train_count + batch_size; ++i) {
- if (!getData(dataFile, o, l, i)) {
- return -1;
- }
-
- for (unsigned int j = 0; j < feature_size; ++j) {
- outVec[0][count * feature_size + j] = o[j];
- }
- outLabel[0][count] = l[0];
-
- count++;
- }
-
- dataFile.close();
- *last = false;
- train_count += batch_size;
return 0;
}
-
/**
* @brief create NN
* back propagation of NN
if (!args[0].compare("train"))
training = true;
- srand(time(NULL));
+ train_idxes.resize(total_train_data_size);
+ std::iota(train_idxes.begin(), train_idxes.end(), 0);
+ rng.seed(SEED);
std::shared_ptr<ml::train::Dataset> dataset_train, dataset_val;
try {
dataset_train =
- createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+ createDataset(ml::train::DatasetType::GENERATOR, getSample_train);
dataset_val =
- createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+ createDataset(ml::train::DatasetType::GENERATOR, getSample_train);
} catch (std::exception &e) {
std::cerr << "Error creating dataset " << e.what() << std::endl;
return 1;
/**
* @brief Create NN
*/
- std::vector<std::vector<float>> inputVector, outputVector;
nntrainer::NeuralNetwork NN;
/**
* @brief Initialize NN with configuration file path
o.resize(feature_size);
l.resize(1);
- getData(dataFile, o, l, j);
+ getData(dataFile, o.data(), l.data(), j);
try {
float answer =
* Classifier : One Fully Connected Layer
*
*/
-///@todo update below
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#if defined(NNSTREAMER_AVAILABLE) && defined(ENABLE_TEST)
#define APP_VALIDATE
*/
int getBatch_train(float **input, float **label, bool *last, void *user_data) {
static unsigned int iteration = 0;
- if (iteration >= EPOCH_SIZE) {
- *last = true;
- iteration = 0;
- return ML_ERROR_NONE;
- }
-
for (int idx = 0; idx < INPUT_SIZE; idx++) {
input[0][idx] = inputVector[iteration][idx];
}
label[0][idx] = labelVector[iteration][idx];
}
- *last = false;
iteration += 1;
+ if (iteration < EPOCH_SIZE) {
+ *last = false;
+ } else {
+ *last = true;
+ iteration = 0;
+ }
return ML_ERROR_NONE;
}
}
/** Set the dataset from generator */
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- NULL, NULL);
+ status = ml_train_dataset_create(&dataset);
if (status != ML_ERROR_NONE) {
ml_train_model_destroy(handle);
return status;
}
- status = ml_train_dataset_set_property(dataset, "buffer_size=100", NULL);
+ status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
+ getBatch_train, nullptr);
+ if (status != ML_ERROR_NONE) {
+ ml_train_dataset_destroy(dataset);
+ ml_train_model_destroy(handle);
+ return status;
+ }
+
+ status = ml_train_dataset_set_property_for_mode(
+ dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=100", NULL);
if (status != ML_ERROR_NONE) {
ml_train_dataset_destroy(dataset);
ml_train_model_destroy(handle);
for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
training.loss = 0.0f;
- std::future<std::shared_ptr<BatchQueue>> future_bq;
- std::future<std::shared_ptr<IterationQueue>> future_iq;
- if (train_buffer->getType() == "callback") {
- future_bq = train_buffer->startFetchWorker(in_dims, label_dims);
- } else {
- future_iq =
- train_buffer->startFetchWorker_sample(in_dims, label_dims, true);
- }
+ std::future<std::shared_ptr<IterationQueue>> future_iq =
+ train_buffer->startFetchWorker_sample(in_dims, label_dims, true);
// /// @todo make this working, test buffer is running but doing nothing
// if (test_buffer != nullptr && test_buffer->isValid()) {
int count = 0;
while (true) {
- ScopedView<Iteration> iter_view(nullptr);
- if (train_buffer->getType() == "callback") {
-
- auto [last, ins, labels] = *train_buffer->fetch();
- /// @todo multiple input support
- if (last) {
- break;
- }
- in = ins[0];
- label = labels[0];
- } else {
- iter_view = train_buffer->fetch_sample();
- if (iter_view.isEmpty()) {
- break;
- }
- auto &iter = iter_view.get();
- if (iter.batch() != batch_size) {
- /// this is partial batch scenario
- continue;
- }
- /// @todo multiple input support
- in = iter.getInputsRef().front();
- label = iter.getLabelsRef().front();
+ ScopedView<Iteration> iter_view = train_buffer->fetch_sample();
+ if (iter_view.isEmpty()) {
+ break;
+ }
+ auto &iteration = iter_view.get();
+ if (iteration.batch() != batch_size) {
+ /// this is partial batch scenario
+ continue;
}
+ /// @todo multiple input support
+ in = iteration.getInputsRef().front();
+ label = iteration.getLabelsRef().front();
+
forwarding(true);
backwarding(iter++);
training.loss += loss;
}
- if (train_buffer->getType() == "callback") {
- future_bq.get();
- } else {
- future_iq.get();
- }
+ future_iq.get();
if (count == 0)
throw std::runtime_error("No training data");
int right = 0;
validation.loss = 0.0f;
unsigned int tcases = 0;
- std::future<std::shared_ptr<BatchQueue>> future_bq;
- std::future<std::shared_ptr<IterationQueue>> future_iq;
-
- if (valid_buffer->getType() == "callback") {
- future_bq = valid_buffer->startFetchWorker(in_dims, label_dims);
- } else {
- future_iq =
- valid_buffer->startFetchWorker_sample(in_dims, label_dims, false);
- }
+
+ std::future<std::shared_ptr<IterationQueue>> future_iq =
+ valid_buffer->startFetchWorker_sample(in_dims, label_dims, false);
while (true) {
- ScopedView<Iteration> iter_view(nullptr);
- if (valid_buffer->getType() == "callback") {
- auto [last, ins, labels] = *valid_buffer->fetch();
- if (last) {
- break;
- }
- /// @todo multiple input support
- in = ins[0];
- label = labels[0];
- } else {
- iter_view = valid_buffer->fetch_sample();
- if (iter_view.isEmpty()) {
- break;
- }
- auto &iter = iter_view.get();
- if (iter.batch() != batch_size) {
- /// this is partial batch scenario
- continue;
- }
- /// @todo multiple input support
- in = iter.getInputsRef().front();
- label = iter.getLabelsRef().front();
+ ScopedView<Iteration> iter_view = valid_buffer->fetch_sample();
+ if (iter_view.isEmpty()) {
+ break;
}
+ auto &iter = iter_view.get();
+ if (iter.batch() != batch_size) {
+ /// this is partial batch scenario
+ continue;
+ }
+ /// @todo multiple input support
+ in = iter.getInputsRef().front();
+ label = iter.getLabelsRef().front();
forwarding(false);
auto model_out = output.argmax();
tcases++;
}
- if (valid_buffer->getType() == "callback") {
- future_bq.get();
- } else {
- future_iq.get();
- }
+ future_iq.get();
if (tcases == 0) {
ml_loge("Error : 0 test cases");
static DataType from_string(const std::string &value) {
std::stringstream ss(value);
uintptr_t addr = static_cast<uintptr_t>(std::stoull(value, 0, 16));
- std::cerr << "value: " << value << " addr: " << addr;
return reinterpret_cast<DataType>(addr);
}
};
"beta1=0.002", "beta2=0.001", "epsilon=1e-7"}));
EXPECT_NO_THROW(model->setOptimizer(optimizer));
+ auto train_data = createTrainData();
+ auto valid_data = createValidData();
EXPECT_NO_THROW(dataset = ml::train::createDataset(
- ml::train::DatasetType::GENERATOR, getBatch_train));
+ ml::train::DatasetType::GENERATOR, getSample, &train_data));
EXPECT_NO_THROW(dataset->setProperty({"buffer_size=100"}));
EXPECT_EQ(model->setDataset(ml::train::DatasetModeType::MODE_TRAIN, dataset),
ML_ERROR_NONE);
EXPECT_NO_THROW(dataset = ml::train::createDataset(
- ml::train::DatasetType::GENERATOR, getBatch_val));
+ ml::train::DatasetType::GENERATOR, getSample, &valid_data));
EXPECT_NO_THROW(dataset->setProperty({"buffer_size=100"}));
EXPECT_EQ(model->setDataset(ml::train::DatasetModeType::MODE_VALID, dataset),
ML_ERROR_NONE);
EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
EXPECT_NO_THROW(model->train());
- EXPECT_NEAR(model->getTrainingLoss(), 2.2109976, tolerance);
- EXPECT_NEAR(model->getValidationLoss(), 1.995334, tolerance);
+ EXPECT_NEAR(model->getTrainingLoss(), 2.238682, tolerance);
+ EXPECT_NEAR(model->getValidationLoss(), 2.0042247, tolerance);
}
/**
const std::string n, std::string str);
/**
- * @brief get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
+ * @brief UserData which stores information used to feed data from data callback
+ *
+ */
+class DataInformation {
+public:
+ /**
+ * @brief Construct a new Data Information object
+ *
+ * @param num_samples number of data
+ * @param filename file name to read from
+ */
+ DataInformation(unsigned int num_samples, const std::string &filename);
+ unsigned int count;
+ unsigned int num_samples;
+ std::ifstream file;
+ std::vector<unsigned int> idxes;
+ std::mt19937 rng;
+};
+
+/**
+ * @brief Create a user data for training
+ *
+ * @return DataInformation
+ */
+DataInformation createTrainData();
+
+/**
+ * @brief Create a user data for validataion
+ *
+ * @return DataInformation
*/
-int getBatch_train(float **outVec, float **outLabel, bool *last,
- void *user_data);
+DataInformation createValidData();
/**
- * @brief get data which size is batch for val
+ * @brief get data which size is batch
* @param[out] outVec
* @param[out] outLabel
* @param[out] last if the data is finished
* @param[in] user_data private data for the callback
* @retval status for handling error
*/
-int getBatch_val(float **outVec, float **outLabel, bool *last, void *user_data);
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data);
/**
* @brief Get the Res Path object
#define batch_size 16
#define feature_size 62720
-static bool *duplicate;
-static bool *valduplicate;
-static bool alloc_train = false;
-static bool alloc_val = false;
static std::mt19937 rng(0);
/**
}
/**
- * @brief Generate Random integer value between min to max
- * @param[in] min : minimum value
- * @param[in] max : maximum value
- * @retval min < random value < max
- */
-static int rangeRandom(int min, int max) {
- std::uniform_int_distribution<int> dist(min, max);
- return dist(rng);
-}
-
-/**
* @brief load data at specific position of file
* @param[in] F ifstream (input file)
* @param[out] outVec
* @param[in] id th data to get
* @retval true/false false : end of data
*/
-static bool getData(std::ifstream &F, std::vector<float> &outVec,
- std::vector<float> &outLabel, unsigned int id) {
+static bool getData(std::ifstream &F, float *outVec, float *outLabel,
+ unsigned int id) {
F.clear();
F.seekg(0, std::ios_base::end);
uint64_t file_length = F.tellg();
return false;
}
F.seekg(position, std::ios::beg);
- for (unsigned int i = 0; i < feature_size; i++)
- F.read((char *)&outVec[i], sizeof(float));
- for (unsigned int i = 0; i < num_class; i++)
- F.read((char *)&outLabel[i], sizeof(float));
+ F.read((char *)outVec, sizeof(float) * feature_size);
+ F.read((char *)outLabel, sizeof(float) * num_class);
return true;
}
-/**
- * @brief get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
- */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
- void *user_data) {
- std::vector<int> memI;
- std::vector<int> memJ;
- unsigned int count = 0;
- unsigned int data_size = 0;
- *last = true;
-
- std::string filename = getResPath("trainingSet.dat", {"test"});
- std::ifstream F(filename, std::ios::in | std::ios::binary);
-
- if (F.good()) {
- F.seekg(0, std::ios::end);
- long file_size = F.tellg();
- data_size = static_cast<unsigned int>(
- file_size / ((num_class + feature_size) * sizeof(float)));
- }
-
- if (!alloc_train) {
- duplicate = (bool *)malloc(sizeof(bool) * data_size);
- if (duplicate == nullptr) {
- ml_loge("[test_util] allocationg memory failed");
- alloc_train = false;
- *last = false;
- F.close();
- return ML_ERROR_BAD_ADDRESS;
- }
-
- for (unsigned int i = 0; i < data_size; ++i) {
- duplicate[i] = false;
- }
- alloc_train = true;
- }
-
- for (unsigned int i = 0; i < data_size; i++) {
- if (!duplicate[i])
- count++;
- }
-
- if (count < batch_size) {
- free(duplicate);
- alloc_train = false;
- *last = true;
- return ML_ERROR_NONE;
- }
-
- count = 0;
- while (count < batch_size) {
- int nomI = rangeRandom(0, data_size - 1);
- if (!duplicate[nomI]) {
- memI.push_back(nomI);
- duplicate[nomI] = true;
- count++;
- }
+DataInformation::DataInformation(unsigned int num_samples,
+ const std::string &filename) :
+ count(0),
+ num_samples(num_samples),
+ file(filename, std::ios::in | std::ios::binary),
+ idxes(num_samples) {
+ std::iota(idxes.begin(), idxes.end(), 0);
+ std::shuffle(idxes.begin(), idxes.end(), rng);
+ rng.seed(0);
+ if (!file.good()) {
+ throw std::invalid_argument("given file is not good, filename: " +
+ filename);
}
+}
- for (unsigned int i = 0; i < count; i++) {
- std::vector<float> o;
- std::vector<float> l;
-
- o.resize(feature_size);
- l.resize(num_class);
-
- getData(F, o, l, memI[i]);
-
- for (unsigned int j = 0; j < feature_size; ++j)
- outVec[0][i * feature_size + j] = o[j];
- for (unsigned int j = 0; j < num_class; ++j)
- outLabel[0][i * num_class + j] = l[j];
- }
+static auto getDataSize = [](const std::string &file_name) {
+ std::ifstream f(file_name, std::ios::in | std::ios::binary);
+ NNTR_THROW_IF(!f.good(), std::invalid_argument)
+ << "cannot find " << file_name;
+ f.seekg(0, std::ios::end);
+ long file_size = f.tellg();
+ return static_cast<unsigned int>(
+ file_size / ((num_class + feature_size) * sizeof(float)));
+};
+
+std::string train_filename = getResPath("trainingSet.dat", {"test"});
+std::string valid_filename = getResPath("trainingSet.dat", {"test"});
+
+DataInformation createTrainData() {
+ return DataInformation(getDataSize(train_filename), train_filename);
+}
- F.close();
- *last = false;
- return ML_ERROR_NONE;
+DataInformation createValidData() {
+ return DataInformation(getDataSize(valid_filename), valid_filename);
}
/**
- * @brief get data which size is batch for validation
+ * @brief get data which size is batch for train
* @param[out] outVec
* @param[out] outLabel
* @param[out] last if the data is finished
* @param[in] user_data private data for the callback
* @retval status for handling error
*/
-int getBatch_val(float **outVec, float **outLabel, bool *last,
- void *user_data) {
-
- std::vector<int> memI;
- std::vector<int> memJ;
- unsigned int count = 0;
- unsigned int data_size = 0;
- *last = true;
-
- std::string filename = getResPath("trainingSet.dat", {"test"});
- std::ifstream F(filename, std::ios::in | std::ios::binary);
-
- if (F.good()) {
- F.seekg(0, std::ios::end);
- long file_size = F.tellg();
- data_size = static_cast<unsigned int>(
- file_size / ((num_class + feature_size) * sizeof(float)));
- }
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data) {
+ auto data = reinterpret_cast<DataInformation *>(user_data);
- if (!alloc_val) {
- valduplicate = (bool *)malloc(sizeof(bool) * data_size);
- if (valduplicate == nullptr) {
- ml_loge("[test_util] allocationg memory failed");
- alloc_val = false;
- *last = false;
- F.close();
- return ML_ERROR_BAD_ADDRESS;
- }
- for (unsigned int i = 0; i < data_size; ++i) {
- valduplicate[i] = false;
- }
- alloc_val = true;
- }
-
- for (unsigned int i = 0; i < data_size; i++) {
- if (!valduplicate[i])
- count++;
- }
-
- if (count < batch_size) {
- free(valduplicate);
- alloc_val = false;
+ getData(data->file, *outVec, *outLabel, data->idxes.at(data->count));
+ data->count++;
+ if (data->count < data->num_samples) {
+ *last = false;
+ } else {
*last = true;
- return ML_ERROR_NONE;
- }
-
- count = 0;
- while (count < batch_size) {
- int nomI = rangeRandom(0, data_size - 1);
- if (!valduplicate[nomI]) {
- memI.push_back(nomI);
- valduplicate[nomI] = true;
- count++;
- }
- }
-
- for (unsigned int i = 0; i < count; i++) {
- std::vector<float> o;
- std::vector<float> l;
-
- o.resize(feature_size);
- l.resize(num_class);
-
- getData(F, o, l, memI[i]);
-
- for (unsigned int j = 0; j < feature_size; ++j)
- outVec[0][i * feature_size + j] = o[j];
- for (unsigned int j = 0; j < num_class; ++j)
- outLabel[0][i * num_class + j] = l[j];
+ data->count = 0;
+ std::shuffle(data->idxes.begin(), data->idxes.end(), data->rng);
}
- F.close();
- *last = false;
return ML_ERROR_NONE;
}
status = ml_train_model_set_optimizer(model, optimizer);
EXPECT_EQ(status, ML_ERROR_NONE);
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- getBatch_val, NULL);
+ auto train_data = createTrainData();
+ auto valid_data = createValidData();
+
+ status = ml_train_dataset_create_with_generator(&dataset, getSample,
+ getSample, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_set_property(dataset, "buffer_size=100", NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
+ status = ml_train_dataset_set_property_for_mode(
+ dataset, ML_TRAIN_DATASET_MODE_TRAIN, "user_data", &train_data, NULL);
+ EXPECT_EQ(status, ML_ERROR_NONE);
+
+ status = ml_train_dataset_set_property_for_mode(
+ dataset, ML_TRAIN_DATASET_MODE_VALID, "user_data", &valid_data, NULL);
+ EXPECT_EQ(status, ML_ERROR_NONE);
+
status = ml_train_model_set_dataset(model, dataset);
EXPECT_EQ(status, ML_ERROR_NONE);
EXPECT_EQ(status, ML_ERROR_NONE);
/** Compare training statistics */
- nntrainer_capi_model_comp_metrics(model, 2.17921, 1.96506, 60.4167);
+ nntrainer_capi_model_comp_metrics(model, 2.2063899, 1.983489, 64.583297);
status = ml_train_model_destroy(model);
EXPECT_EQ(status, ML_ERROR_NONE);
static int constant_generator_cb(float **outVec, float **outLabel, bool *last,
void *user_data) {
static int count = 0;
-
- unsigned int batch_size = 9;
unsigned int feature_size = 100;
unsigned int num_class = 10;
- unsigned int data_size = batch_size * feature_size;
- for (unsigned int i = 0; i < data_size; ++i) {
+ for (unsigned int i = 0; i < feature_size; ++i) {
outVec[0][i] = 0.0f;
}
- for (unsigned int i = 0; i < batch_size; ++i) {
- outLabel[0][i * num_class] = 1.0f;
- for (unsigned int j = 1; j < num_class; ++j) {
- outLabel[0][i * num_class + j] = 0.0f;
- }
+ outLabel[0][0] = 1.0f;
+ for (unsigned int j = 1; j < num_class; ++j) {
+ outLabel[0][j] = 0.0f;
}
- if (count == 10) {
+ count++;
+ if (count == 9) {
*last = true;
count = 0;
} else {
*last = false;
- count++;
}
return ML_ERROR_NONE;
TEST(nntrainer_capi_dataset, create_destroy_07_p) {
ml_train_dataset_h dataset;
int status;
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- NULL, NULL);
+ status =
+ ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_destroy(dataset);
EXPECT_EQ(status, ML_ERROR_NONE);
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- getBatch_val, getBatch_val);
+ status = ml_train_dataset_create_with_generator(&dataset, getSample,
+ getSample, getSample);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_destroy(dataset);
EXPECT_EQ(status, ML_ERROR_NONE);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
- getBatch_train, NULL);
+ getSample, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
- getBatch_val, NULL);
+ getSample, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_VALID,
- getBatch_train, NULL);
+ getSample, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TEST,
- getBatch_train, NULL);
+ getSample, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_destroy(dataset);
ml_train_dataset_h dataset;
int status;
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- NULL, NULL);
+ status =
+ ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_set_property(dataset, "buffer_size=10", NULL);
ml_train_dataset_h dataset;
int status;
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- NULL, NULL);
+ status =
+ ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_set_property(dataset, "user_data=10", NULL);
ml_train_dataset_h dataset;
int status = ML_ERROR_NONE;
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- NULL, NULL);
+ status =
+ ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status =
dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=1", NULL);
EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER);
+ auto train_data = createTrainData();
+ auto valid_data = createValidData();
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
- getBatch_val, nullptr);
+ getSample, &train_data);
status = ml_train_dataset_set_property_for_mode(
dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=1", NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_VALID,
- getBatch_val, nullptr);
+ getSample, &valid_data);
status = ml_train_dataset_set_property_for_mode(
dataset, ML_TRAIN_DATASET_MODE_VALID, "buffer_size=1", NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TEST,
- getBatch_val, nullptr);
+ getSample, &train_data);
status = ml_train_dataset_set_property_for_mode(
dataset, ML_TRAIN_DATASET_MODE_TEST, "buffer_size=1", NULL);
EXPECT_EQ(status, ML_ERROR_NONE);
set_dataset_property_for_mode_does_not_exist_valid_n) {
ml_train_dataset_h dataset;
int status = ML_ERROR_NONE;
-
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- nullptr, getBatch_train);
+ status = ml_train_dataset_create_with_generator(&dataset, getSample, nullptr,
+ getSample);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_set_property_for_mode(
ml_train_dataset_h dataset;
int status = ML_ERROR_NONE;
- status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
- nullptr, nullptr);
+ status = ml_train_dataset_create_with_generator(&dataset, getSample, nullptr,
+ nullptr);
EXPECT_EQ(status, ML_ERROR_NONE);
status = ml_train_dataset_set_property_for_mode(