From 1051724ec8c923ba15adfc744654f121139fd84e Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Thu, 8 Jul 2021 18:58:43 +0900 Subject: [PATCH] [dataset] split train / val / test databuffer This patch splits train / val / test dataset. It is also possible to set dataset separately from the model. **Major Changes** 1. `auto dataset = createDataset(train_cb, val_cb, test_cb)` -> `auto dataset_train = createDataset(train_cb)` 1. `NN.setDataset(dataset);` -> `NN.setDataset(DATA_TRAIN, dataset_train)` **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- Applications/Custom/LayerClient/jni/main.cpp | 10 +- Applications/LogisticRegression/jni/main.cpp | 9 +- Applications/MNIST/jni/main.cpp | 12 ++- Applications/ProductRatings/jni/main.cpp | 11 +- Applications/Resnet/jni/main.cpp | 76 ++++++------- Applications/SimpleShot/task_runner.cpp | 21 ++-- .../CIFAR_Classification/jni/main_func.cpp | 11 +- .../Draw_Classification/jni/main.cpp | 10 +- Applications/VGG/jni/main.cpp | 15 +-- api/capi/include/nntrainer_internal.h | 3 +- api/capi/src/nntrainer.cpp | 58 ++++++++-- api/ccapi/include/dataset.h | 9 +- api/ccapi/include/model.h | 4 +- api/ccapi/src/factory.cpp | 12 +-- nntrainer/dataset/databuffer.cpp | 3 +- nntrainer/dataset/databuffer.h | 4 +- nntrainer/dataset/databuffer_factory.cpp | 41 ++----- nntrainer/dataset/databuffer_factory.h | 9 +- nntrainer/dataset/databuffer_func.cpp | 3 +- nntrainer/dataset/databuffer_func.h | 3 +- nntrainer/models/model_loader.cpp | 33 +++--- nntrainer/models/neuralnet.cpp | 119 ++++++++++++++------- nntrainer/models/neuralnet.h | 28 +++-- nntrainer/utils/parse_util.cpp | 1 + test/ccapi/unittest_ccapi.cpp | 64 +++++++---- test/tizen_capi/unittest_tizen_capi.cpp | 4 +- 26 files changed, 351 insertions(+), 222 deletions(-) diff --git a/Applications/Custom/LayerClient/jni/main.cpp b/Applications/Custom/LayerClient/jni/main.cpp index 57c703d..c288069 100644 --- a/Applications/Custom/LayerClient/jni/main.cpp +++ b/Applications/Custom/LayerClient/jni/main.cpp @@ -79,13 +79,14 @@ static int ini_model_run(const std::string &ini_path) { std::shared_ptr dataset; try { dataset = ml::train::createDataset(ml::train::DatasetType::GENERATOR, - constant_generator_cb, nullptr, nullptr); + constant_generator_cb); } catch (...) { std::cerr << "creating dataset failed"; return 1; } - if (model->setDataset(dataset) != 0) { + if (model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset) != + 0) { std::cerr << "failed to set datatset"; return 1; } @@ -124,13 +125,14 @@ int api_model_run() { try { dataset = ml::train::createDataset(ml::train::DatasetType::GENERATOR, - constant_generator_cb, nullptr, nullptr); + constant_generator_cb); } catch (...) { std::cerr << "creating dataset failed"; return 1; } - if (model->setDataset(dataset) != 0) { + if (model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset) != + 0) { std::cerr << "failed to set datatset"; return 1; } diff --git a/Applications/LogisticRegression/jni/main.cpp b/Applications/LogisticRegression/jni/main.cpp index 7d35857..b10525d 100644 --- a/Applications/LogisticRegression/jni/main.cpp +++ b/Applications/LogisticRegression/jni/main.cpp @@ -170,10 +170,9 @@ int main(int argc, char *argv[]) { srand(time(NULL)); - std::shared_ptr DB = - std::make_shared(); - DB->setGeneratorFunc(nntrainer::DatasetDataUsageType::DATA_TRAIN, - getBatch_train); + auto data_train = std::make_shared(); + data_train->setGeneratorFunc(ml::train::DatasetDataUsageType::DATA_TRAIN, + getBatch_train); /** * @brief Create NN @@ -195,7 +194,7 @@ int main(int argc, char *argv[]) { } if (training) { - NN.setDataBuffer((DB)); + NN.setDataBuffer(ml::train::DatasetDataUsageType::DATA_TRAIN, data_train); try { NN.train(); diff --git a/Applications/MNIST/jni/main.cpp b/Applications/MNIST/jni/main.cpp index 047bb78..f6e843e 100644 --- a/Applications/MNIST/jni/main.cpp +++ b/Applications/MNIST/jni/main.cpp @@ -285,10 +285,12 @@ int main(int argc, char *argv[]) { /** * @brief Data buffer Create & Initialization */ - std::shared_ptr dataset; + std::shared_ptr dataset_train, dataset_val; try { - dataset = createDataset(ml::train::DatasetType::GENERATOR, getBatch_train, - getBatch_val); + dataset_train = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_train); + dataset_val = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_val); } catch (std::exception &e) { std::cerr << "Error creating dataset" << e.what() << std::endl; return 1; @@ -310,7 +312,9 @@ int main(int argc, char *argv[]) { model->compile(); model->initialize(); model->readModel(); - model->setDataset(dataset); + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, + dataset_train); + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset_val); } catch (std::exception &e) { std::cerr << "Error during init " << e.what() << std::endl; return 1; diff --git a/Applications/ProductRatings/jni/main.cpp b/Applications/ProductRatings/jni/main.cpp index 5cc71bc..1b0ba6a 100644 --- a/Applications/ProductRatings/jni/main.cpp +++ b/Applications/ProductRatings/jni/main.cpp @@ -173,10 +173,12 @@ int main(int argc, char *argv[]) { srand(time(NULL)); - std::shared_ptr dataset; + std::shared_ptr dataset_train, dataset_val; try { - dataset = createDataset(ml::train::DatasetType::GENERATOR, getBatch_train, - getBatch_train); + dataset_train = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_train); + dataset_val = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_train); } catch (std::exception &e) { std::cerr << "Error creating dataset" << e.what() << std::endl; return 1; @@ -218,7 +220,8 @@ int main(int argc, char *argv[]) { } if (training) { - NN.setDataset(dataset); + NN.setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset_train); + NN.setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset_val); try { NN.train({"batch_size=" + std::to_string(batch_size)}); } catch (std::exception &e) { diff --git a/Applications/Resnet/jni/main.cpp b/Applications/Resnet/jni/main.cpp index 773576f..390276e 100644 --- a/Applications/Resnet/jni/main.cpp +++ b/Applications/Resnet/jni/main.cpp @@ -10,6 +10,7 @@ * @author Jihoon Lee * @bug No known bugs except for NYI items */ +#include #include #include #include @@ -26,8 +27,7 @@ using LayerHandle = std::shared_ptr; using ModelHandle = std::unique_ptr; -using UserDataType = - std::vector>; +using UserDataType = std::unique_ptr; /** * @brief make "key=value" from key and value @@ -194,24 +194,23 @@ ModelHandle createResnet18() { } int trainData_cb(float **input, float **label, bool *last, void *user_data) { - auto data = reinterpret_cast< - std::vector> *>(user_data); + auto data = reinterpret_cast(user_data); - data->at(0)->next(input, label, last); + data->next(input, label, last); return 0; } int validData_cb(float **input, float **label, bool *last, void *user_data) { - auto data = reinterpret_cast< - std::vector> *>(user_data); + auto data = reinterpret_cast(user_data); - data->at(1)->next(input, label, last); + data->next(input, label, last); return 0; } /// @todo maybe make num_class also a parameter void createAndRun(unsigned int epochs, unsigned int batch_size, - UserDataType *user_data) { + UserDataType &train_user_data, + UserDataType &valid_user_data) { ModelHandle model = createResnet18(); model->setProperty({withKey("batch_size", batch_size), withKey("epochs", epochs), @@ -230,46 +229,45 @@ void createAndRun(unsigned int epochs, unsigned int batch_size, throw std::invalid_argument("model initialization failed!"); } - auto dataset = ml::train::createDataset(ml::train::DatasetType::GENERATOR, - trainData_cb, validData_cb); + auto dataset_train = ml::train::createDataset( + ml::train::DatasetType::GENERATOR, trainData_cb, train_user_data.get()); + auto dataset_valid = ml::train::createDataset( + ml::train::DatasetType::GENERATOR, validData_cb, valid_user_data.get()); - std::vector dataset_props; - dataset_props.push_back((void *)"user_data"); - dataset_props.push_back((void *)user_data); - dataset->setProperty(dataset_props); - - model->setDataset(std::move(dataset)); + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, + std::move(dataset_train)); + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, + std::move(dataset_valid)); model->train(); } -UserDataType createFakeDataGenerator(unsigned int batch_size, - unsigned int simulted_data_size, - unsigned int data_split) { - UserDataType user_data; - unsigned int simulated_data_size = 512; - /// this is for train - user_data.emplace_back(new nntrainer::resnet::RandomDataLoader( +std::array +createFakeDataGenerator(unsigned int batch_size, + unsigned int simulted_data_size, + unsigned int data_split) { + constexpr unsigned int simulated_data_size = 512; + + UserDataType train_data(new nntrainer::resnet::RandomDataLoader( {{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}}, simulated_data_size / data_split)); - /// this is for validation - user_data.emplace_back(new nntrainer::resnet::RandomDataLoader( + UserDataType valid_data(new nntrainer::resnet::RandomDataLoader( {{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}}, simulated_data_size / data_split)); - return user_data; + return {std::move(train_data), std::move(valid_data)}; } -UserDataType createRealDataGenerator(const std::string &directory, - unsigned int batch_size, - unsigned int data_split) { - UserDataType user_data; - user_data.emplace_back(new nntrainer::resnet::Cifar100DataLoader( +std::array +createRealDataGenerator(const std::string &directory, unsigned int batch_size, + unsigned int data_split) { + + UserDataType train_data(new nntrainer::resnet::Cifar100DataLoader( directory + "/train.bin", batch_size, data_split)); - user_data.emplace_back(new nntrainer::resnet::Cifar100DataLoader( + UserDataType valid_data(new nntrainer::resnet::Cifar100DataLoader( directory + "/test.bin", batch_size, data_split)); - return user_data; + return {std::move(train_data), std::move(valid_data)}; } int main(int argc, char *argv[]) { @@ -296,13 +294,13 @@ int main(int argc, char *argv[]) { /// warning: the data loader will be destroyed at the end of this function, /// and passed as a pointer to the databuffer - UserDataType user_data; + std::array user_datas; try { if (data_dir == "fake") { - user_data = createFakeDataGenerator(batch_size, 512, data_split); + user_datas = createFakeDataGenerator(batch_size, 512, data_split); } else { - user_data = createRealDataGenerator(data_dir, batch_size, data_split); + user_datas = createRealDataGenerator(data_dir, batch_size, data_split); } } catch (std::exception &e) { std::cerr << "uncaught error while creating data generator! details: " @@ -310,8 +308,10 @@ int main(int argc, char *argv[]) { return 1; } + auto &[train_user_data, valid_user_data] = user_datas; + try { - createAndRun(epoch, batch_size, &user_data); + createAndRun(epoch, batch_size, train_user_data, valid_user_data); } catch (std::exception &e) { std::cerr << "uncaught error while running! details: " << e.what() << '\n'; return 1; diff --git a/Applications/SimpleShot/task_runner.cpp b/Applications/SimpleShot/task_runner.cpp index 6296539..0930082 100644 --- a/Applications/SimpleShot/task_runner.cpp +++ b/Applications/SimpleShot/task_runner.cpp @@ -241,18 +241,27 @@ int main(int argc, char **argv) { return 1; } - std::shared_ptr train_dataset; + std::shared_ptr train_dataset, valid_dataset; try { - train_dataset = ml::train::createDataset( - ml::train::DatasetType::FILE, - {"train_data=" + train_path, "val_data=" + val_path}); + train_dataset = ml::train::createDataset(ml::train::DatasetType::FILE, + train_path.c_str()); + valid_dataset = + ml::train::createDataset(ml::train::DatasetType::FILE, val_path.c_str()); + } catch (...) { std::cerr << "creating dataset failed"; return 1; } - if (model->setDataset(train_dataset)) { - std::cerr << "failed to set dataset" << std::endl; + if (model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, + train_dataset)) { + std::cerr << "failed to set train dataset" << std::endl; + return 1; + }; + + if (model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, + valid_dataset)) { + std::cerr << "failed to set valid dataset" << std::endl; return 1; }; diff --git a/Applications/TransferLearning/CIFAR_Classification/jni/main_func.cpp b/Applications/TransferLearning/CIFAR_Classification/jni/main_func.cpp index efa3a07..8030694 100644 --- a/Applications/TransferLearning/CIFAR_Classification/jni/main_func.cpp +++ b/Applications/TransferLearning/CIFAR_Classification/jni/main_func.cpp @@ -273,10 +273,12 @@ int main(int argc, char *argv[]) { /** * @brief Data buffer Create & Initialization */ - std::shared_ptr dataset; + std::shared_ptr dataset_train, dataset_val; try { - dataset = createDataset(ml::train::DatasetType::GENERATOR, getBatch_train, - getBatch_val); + dataset_train = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_train); + dataset_val = + createDataset(ml::train::DatasetType::GENERATOR, getBatch_val); } catch (...) { std::cerr << "Error creating dataset" << std::endl; return 1; @@ -306,7 +308,8 @@ int main(int argc, char *argv[]) { std::cerr << "Error during readModel, reason: " << e.what() << std::endl; return 1; } - model->setDataset(dataset); + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset_train); + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset_val); /** * @brief Neural Network Train & validation diff --git a/Applications/TransferLearning/Draw_Classification/jni/main.cpp b/Applications/TransferLearning/Draw_Classification/jni/main.cpp index 139ac6a..469936c 100644 --- a/Applications/TransferLearning/Draw_Classification/jni/main.cpp +++ b/Applications/TransferLearning/Draw_Classification/jni/main.cpp @@ -454,7 +454,7 @@ int main(int argc, char *argv[]) { try { loadAllData(data_path, inputVector, labelVector); } catch (...) { - std::cout << "Failed loading input images." << std::endl; + std::cerr << "Failed loading input images." << std::endl; #if defined(__TIZEN__) set_feature_state(NOT_CHECKED_YET); #endif @@ -471,8 +471,10 @@ int main(int argc, char *argv[]) { #endif return 1; } - if (status != ML_ERROR_NONE) + if (status != ML_ERROR_NONE) { + std::cerr << "failed train model\n"; return 1; + } /** Test the trained model */ try { @@ -484,8 +486,10 @@ int main(int argc, char *argv[]) { #endif return 1; } - if (status != ML_ERROR_NONE) + if (status != ML_ERROR_NONE) { + std::cerr << "Failed testing model\n"; return 1; + } #if defined(__TIZEN__) set_feature_state(NOT_CHECKED_YET); diff --git a/Applications/VGG/jni/main.cpp b/Applications/VGG/jni/main.cpp index f0aa196..70bad96 100644 --- a/Applications/VGG/jni/main.cpp +++ b/Applications/VGG/jni/main.cpp @@ -399,12 +399,12 @@ int main(int argc, char *argv[]) { for (unsigned int i = 0; i < count_val.remain; ++i) count_val.duplication[i] = i; - std::shared_ptr DB = - std::make_shared(); - DB->setGeneratorFunc(nntrainer::DatasetDataUsageType::DATA_TRAIN, - getBatch_train_file); - DB->setGeneratorFunc(nntrainer::DatasetDataUsageType::DATA_VAL, - getBatch_val_file); + auto db_train = std::make_shared(); + db_train->setGeneratorFunc(ml::train::DatasetDataUsageType::DATA_TRAIN, + getBatch_train_file); + auto db_valid = std::make_shared(); + db_valid->setGeneratorFunc(ml::train::DatasetDataUsageType::DATA_VAL, + getBatch_val_file); /** * @brief Neural Network Create & Initialization @@ -427,7 +427,8 @@ int main(int argc, char *argv[]) { try { NN.readModel(); - NN.setDataBuffer((DB)); + NN.setDataBuffer(ml::train::DatasetDataUsageType::DATA_TRAIN, db_train); + NN.setDataBuffer(ml::train::DatasetDataUsageType::DATA_VAL, db_valid); NN.train(); training_loss = NN.getTrainingLoss(); validation_loss = NN.getValidationLoss(); diff --git a/api/capi/include/nntrainer_internal.h b/api/capi/include/nntrainer_internal.h index 0d02644..15ac43b 100644 --- a/api/capi/include/nntrainer_internal.h +++ b/api/capi/include/nntrainer_internal.h @@ -25,6 +25,7 @@ #ifndef __NNTRAINER_INTERNAL_H__ #define __NNTRAINER_INTERNAL_H__ +#include #include #include #include @@ -101,7 +102,7 @@ typedef struct { */ typedef struct { uint magic; - std::shared_ptr dataset; + std::array, 3> dataset; bool in_use; std::mutex m; } ml_train_dataset; diff --git a/api/capi/src/nntrainer.cpp b/api/capi/src/nntrainer.cpp index eef890a..d784868 100644 --- a/api/capi/src/nntrainer.cpp +++ b/api/capi/src/nntrainer.cpp @@ -22,6 +22,7 @@ * @bug No known bugs except for NYI items */ +#include #include #include #include @@ -143,7 +144,16 @@ static int ml_train_dataset_create(ml_train_dataset_h *dataset, nndataset->in_use = false; returnable f = [&]() { - nndataset->dataset = ml::train::createDataset(type, train, valid, test); + nndataset->dataset[ML_TRAIN_DATASET_DATA_USAGE_TRAIN] = + ml::train::createDataset(type, train); + if (valid != nullptr) { + nndataset->dataset[ML_TRAIN_DATASET_DATA_USAGE_VALID] = + ml::train::createDataset(type, valid); + } + if (test != nullptr) { + nndataset->dataset[ML_TRAIN_DATASET_DATA_USAGE_TEST] = + ml::train::createDataset(type, test); + } return ML_ERROR_NONE; }; @@ -492,12 +502,35 @@ int ml_train_model_set_dataset(ml_train_model_h model, } std::shared_ptr m; - std::shared_ptr d; m = nnmodel->model; - d = nndataset->dataset; - returnable f = [&]() { return m->setDataset(d); }; + returnable f = [&]() { + auto &[train_set, valid_set, test_set] = nndataset->dataset; + int status = ML_ERROR_NONE; + status = + m->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, train_set); + if (status != ML_ERROR_NONE) { + return status; + } + + if (valid_set != nullptr) { + status = + m->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, valid_set); + if (status != ML_ERROR_NONE) { + return status; + } + } + + if (test_set != nullptr) { + status = + m->setDataset(ml::train::DatasetDataUsageType::DATA_TEST, test_set); + if (status != ML_ERROR_NONE) { + return status; + } + } + return status; + }; status = nntrainer_exception_boundary(f); if (status == ML_ERROR_NONE) { @@ -735,7 +768,7 @@ int ml_train_dataset_set_property(ml_train_dataset_h dataset, ...) { int status = ML_ERROR_NONE; ml_train_dataset *nndataset; void *data; - std::shared_ptr d; + std::array, 3> db; check_feature_state(); @@ -755,10 +788,21 @@ int ml_train_dataset_set_property(ml_train_dataset_h dataset, ...) { ML_TRAIN_GET_VALID_DATASET_LOCKED(nndataset, dataset); ML_TRAIN_ADOPT_LOCK(nndataset, dataset_lock); - d = nndataset->dataset; + db = nndataset->dataset; } - returnable f = [&]() { return d->setProperty(arg_list); }; + returnable f = [&]() { + int status = ML_ERROR_NONE; + for (auto &d : db) { + if (d != nullptr) { + status = d->setProperty(arg_list); + if (status != ML_ERROR_NONE) { + return status; + } + } + } + return status; + }; status = nntrainer_exception_boundary(f); return status; diff --git a/api/ccapi/include/dataset.h b/api/ccapi/include/dataset.h index 0c285e8..ac1851e 100644 --- a/api/ccapi/include/dataset.h +++ b/api/ccapi/include/dataset.h @@ -98,16 +98,13 @@ createDataset(DatasetType type, /** * @brief Factory creator with constructor for dataset */ -std::unique_ptr createDataset(DatasetType type, const char *train_file, - const char *valid_file = nullptr, - const char *test_file = nullptr); +std::unique_ptr createDataset(DatasetType type, const char *file); /** * @brief Factory creator with constructor for dataset */ -std::unique_ptr createDataset(DatasetType type, datagen_cb train, - datagen_cb valid = nullptr, - datagen_cb test = nullptr); +std::unique_ptr createDataset(DatasetType type, datagen_cb cb, + void *user_data = nullptr); } // namespace train } // namespace ml diff --git a/api/ccapi/include/model.h b/api/ccapi/include/model.h index 0bc05ee..d652521 100644 --- a/api/ccapi/include/model.h +++ b/api/ccapi/include/model.h @@ -153,11 +153,13 @@ public: /** * @brief Run Model train with callback function by user + * @param[in] usage usage of the dataset * @param[in] dataset set the dataset * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - virtual int setDataset(std::shared_ptr dataset) = 0; + virtual int setDataset(const ml::train::DatasetDataUsageType &usage, + std::shared_ptr dataset) = 0; /** * @brief add layer into neural network model diff --git a/api/ccapi/src/factory.cpp b/api/ccapi/src/factory.cpp index 58b4ac6..7644cd5 100644 --- a/api/ccapi/src/factory.cpp +++ b/api/ccapi/src/factory.cpp @@ -139,18 +139,16 @@ createDataset(DatasetType type, const std::vector &properties) { /** * @brief Factory creator with constructor for dataset */ -std::unique_ptr createDataset(DatasetType type, const char *train_file, - const char *valid_file, - const char *test_file) { - return nntrainer::createDataBuffer(type, train_file, valid_file, test_file); +std::unique_ptr createDataset(DatasetType type, const char *file) { + return nntrainer::createDataBuffer(type, file); } /** * @brief Factory creator with constructor for dataset */ -std::unique_ptr createDataset(DatasetType type, datagen_cb train, - datagen_cb valid, datagen_cb test) { - return nntrainer::createDataBuffer(type, train, valid, test); +std::unique_ptr createDataset(DatasetType type, datagen_cb cb, + void *user_data) { + return nntrainer::createDataBuffer(type, cb, user_data); } } // namespace train diff --git a/nntrainer/dataset/databuffer.cpp b/nntrainer/dataset/databuffer.cpp index f3eed19..eb3ad0d 100644 --- a/nntrainer/dataset/databuffer.cpp +++ b/nntrainer/dataset/databuffer.cpp @@ -510,7 +510,8 @@ int DataBuffer::setProperty(const PropertyType type, std::string &value) { return status; } -int DataBuffer::setGeneratorFunc(DatasetDataUsageType type, datagen_cb func) { +int DataBuffer::setGeneratorFunc(DatasetDataUsageType type, datagen_cb func, + void *user_data) { return ML_ERROR_NOT_SUPPORTED; } diff --git a/nntrainer/dataset/databuffer.h b/nntrainer/dataset/databuffer.h index f7a245c..7c2224a 100644 --- a/nntrainer/dataset/databuffer.h +++ b/nntrainer/dataset/databuffer.h @@ -186,10 +186,12 @@ public: * @brief set function pointer for each type * @param[in] type Buffer Type * @param[in] call back function pointer + * @param[in] user_data user_data of the callback * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - virtual int setGeneratorFunc(DatasetDataUsageType type, datagen_cb func); + virtual int setGeneratorFunc(DatasetDataUsageType type, datagen_cb func, + void *user_data = nullptr); /** * @brief set train data file name diff --git a/nntrainer/dataset/databuffer_factory.cpp b/nntrainer/dataset/databuffer_factory.cpp index 63972eb..32cfa04 100644 --- a/nntrainer/dataset/databuffer_factory.cpp +++ b/nntrainer/dataset/databuffer_factory.cpp @@ -38,34 +38,18 @@ std::unique_ptr createDataBuffer(DatasetType type) { * @brief Factory creator with constructor for dataset */ std::unique_ptr createDataBuffer(DatasetType type, - const char *train_file, - const char *valid_file, - const char *test_file) { + const char *file) { if (type != DatasetType::FILE) throw std::invalid_argument( "Cannot create dataset with files with the given dataset type"); std::unique_ptr dataset = createDataBuffer(type); - NNTR_THROW_IF(train_file == nullptr || + NNTR_THROW_IF(file == nullptr || dataset->setDataFile(DatasetDataUsageType::DATA_TRAIN, - train_file) != ML_ERROR_NONE, + file) != ML_ERROR_NONE, std::invalid_argument) - << "invalid train file, path: " << (train_file ? train_file : "null"); - - if (valid_file) { - NNTR_THROW_IF(dataset->setDataFile(DatasetDataUsageType::DATA_VAL, - valid_file) != ML_ERROR_NONE, - std::invalid_argument) - << "invalid valid file, path: " << (valid_file ? valid_file : "null"); - } - - if (test_file) { - NNTR_THROW_IF(dataset->setDataFile(DatasetDataUsageType::DATA_TEST, - test_file) != ML_ERROR_NONE, - std::invalid_argument) - << "invalid test file, path: " << (test_file ? test_file : "null"); - } + << "invalid train file, path: " << (file ? file : "null"); return dataset; } @@ -73,27 +57,18 @@ std::unique_ptr createDataBuffer(DatasetType type, /** * @brief Factory creator with constructor for dataset */ -std::unique_ptr createDataBuffer(DatasetType type, datagen_cb train, - datagen_cb valid, - datagen_cb test) { +std::unique_ptr createDataBuffer(DatasetType type, datagen_cb cb, + void *user_data) { if (type != DatasetType::GENERATOR) throw std::invalid_argument("Cannot create dataset with generator " "callbacks with the given dataset type"); std::unique_ptr dataset = createDataBuffer(type); - if (dataset->setGeneratorFunc(DatasetDataUsageType::DATA_TRAIN, train) != - ML_ERROR_NONE) + if (dataset->setGeneratorFunc(DatasetDataUsageType::DATA_TRAIN, cb, + user_data) != ML_ERROR_NONE) throw std::invalid_argument("Invalid train data generator"); - if (valid && dataset->setGeneratorFunc(DatasetDataUsageType::DATA_VAL, - valid) != ML_ERROR_NONE) - throw std::invalid_argument("Invalid valid data generator"); - - if (test && dataset->setGeneratorFunc(DatasetDataUsageType::DATA_TEST, - test) != ML_ERROR_NONE) - throw std::invalid_argument("Invalid test data generator"); - return dataset; } diff --git a/nntrainer/dataset/databuffer_factory.h b/nntrainer/dataset/databuffer_factory.h index 306a574..ff13dbc 100644 --- a/nntrainer/dataset/databuffer_factory.h +++ b/nntrainer/dataset/databuffer_factory.h @@ -27,16 +27,13 @@ std::unique_ptr createDataBuffer(DatasetType type); * @brief Factory creator with constructor for databuffer with files */ std::unique_ptr createDataBuffer(DatasetType type, - const char *train_file, - const char *valid_file = nullptr, - const char *test_file = nullptr); + const char *file); /** * @brief Factory creator with constructor for databuffer with callbacks */ -std::unique_ptr createDataBuffer(DatasetType type, datagen_cb train, - datagen_cb valid = nullptr, - datagen_cb test = nullptr); +std::unique_ptr createDataBuffer(DatasetType type, datagen_cb cb, + void *user_data = nullptr); } /* namespace nntrainer */ diff --git a/nntrainer/dataset/databuffer_func.cpp b/nntrainer/dataset/databuffer_func.cpp index e87e9e4..53c8bc3 100644 --- a/nntrainer/dataset/databuffer_func.cpp +++ b/nntrainer/dataset/databuffer_func.cpp @@ -86,7 +86,7 @@ int DataBufferFromCallback::init() { } int DataBufferFromCallback::setGeneratorFunc(DatasetDataUsageType type, - datagen_cb func) { + datagen_cb func, void *user_data) { int status = ML_ERROR_NONE; switch (type) { @@ -94,6 +94,7 @@ int DataBufferFromCallback::setGeneratorFunc(DatasetDataUsageType type, if (!func) return ML_ERROR_INVALID_PARAMETER; callback_train = func; + this->user_data = user_data; if (func) validation[0] = true; break; diff --git a/nntrainer/dataset/databuffer_func.h b/nntrainer/dataset/databuffer_func.h index 184c459..87dfc76 100644 --- a/nntrainer/dataset/databuffer_func.h +++ b/nntrainer/dataset/databuffer_func.h @@ -65,7 +65,8 @@ public: * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - int setGeneratorFunc(DatasetDataUsageType type, datagen_cb func); + int setGeneratorFunc(DatasetDataUsageType type, datagen_cb func, + void *user_data = nullptr) override; /** * @brief Update Data Buffer ( it is for child thread ) diff --git a/nntrainer/models/model_loader.cpp b/nntrainer/models/model_loader.cpp index 3c8fa64..f8e8bda 100644 --- a/nntrainer/models/model_loader.cpp +++ b/nntrainer/models/model_loader.cpp @@ -193,9 +193,7 @@ int ModelLoader::loadDatasetConfigIni(dictionary *ini, NeuralNetwork &model) { int status = ML_ERROR_NONE; if (iniparser_find_entry(ini, "Dataset") == 0) { - model.data_buffer = nntrainer::createDataBuffer(DatasetType::GENERATOR); - status = model.data_buffer->setBatchSize(model.batch_size); - return status; + return ML_ERROR_NONE; } if (iniparser_find_entry(ini, "DataSet:Tflite")) { @@ -203,10 +201,14 @@ int ModelLoader::loadDatasetConfigIni(dictionary *ini, NeuralNetwork &model) { return ML_ERROR_INVALID_PARAMETER; } - model.data_buffer = nntrainer::createDataBuffer(DatasetType::FILE); - std::shared_ptr dbuffer = - std::static_pointer_cast(model.data_buffer); + model.data_buffers[static_cast(DatasetDataUsageType::DATA_TRAIN)] = + nntrainer::createDataBuffer(DatasetType::FILE); + model.data_buffers[static_cast(DatasetDataUsageType::DATA_VAL)] = + nntrainer::createDataBuffer(DatasetType::FILE); + model.data_buffers[static_cast(DatasetDataUsageType::DATA_TEST)] = + nntrainer::createDataBuffer(DatasetType::FILE); + unsigned int bufsize = iniparser_getint(ini, "DataSet:BufferSize", 1); std::function parse_and_set = [&](const char *key, DatasetDataUsageType dt, bool required) -> int { const char *path = iniparser_getstring(ini, key, NULL); @@ -215,7 +217,17 @@ int ModelLoader::loadDatasetConfigIni(dictionary *ini, NeuralNetwork &model) { return required ? ML_ERROR_INVALID_PARAMETER : ML_ERROR_NONE; } - return dbuffer->setDataFile(dt, resolvePath(path)); + auto dbuffer = std::static_pointer_cast( + model.data_buffers[static_cast(dt)]); + + if (int status = dbuffer->setBufSize(bufsize)) { + return status; + } + + /// setting data to data_train is intended for now. later the function + /// should be called without this enum + return dbuffer->setDataFile(DatasetDataUsageType::DATA_TRAIN, + resolvePath(path)); }; status = @@ -232,13 +244,6 @@ int ModelLoader::loadDatasetConfigIni(dictionary *ini, NeuralNetwork &model) { ml_logi("setting labelData is deprecated!, it is essentially noop now!"); } - status = model.data_buffer->setBatchSize(model.batch_size); - NN_RETURN_STATUS(); - - unsigned int bufsize = iniparser_getint(ini, "DataSet:BufferSize", 1); - status = model.data_buffer->setBufSize(bufsize); - NN_RETURN_STATUS(); - ml_logd("parsing dataset done"); return status; } diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 544f1ab..effa683 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -26,8 +26,7 @@ #include #include -#include -#include +#include #include #include #include @@ -201,9 +200,11 @@ NeuralNetwork::~NeuralNetwork() { manager.reset(); model_graph.reset(); - if (data_buffer) { - data_buffer->clear(); - } + std::for_each(data_buffers.begin(), data_buffers.end(), [](auto &buffers) { + if (buffers) { + buffers->clear(); + } + }); } /** @@ -454,8 +455,15 @@ void NeuralNetwork::setBatchSize(unsigned int batch) { model_graph.setBatchSize(batch); manager->setBatchSize(batch); - if (data_buffer && data_buffer->setBatchSize(batch_size) != ML_ERROR_NONE) - throw std::invalid_argument("Error setting batchsize for the dataset"); + for (auto &db : data_buffers) { + if (db != nullptr) { + int status = db->setBatchSize(batch_size); + if (status != ML_ERROR_NONE) { + ml_loge("[model] setting batchsize from data buffer failed"); + throw_status(status); + } + } + } } bool NeuralNetwork::validateInput(sharedConstTensors X) { @@ -522,6 +530,11 @@ sharedConstTensors NeuralNetwork::inference(sharedConstTensors X, return out; } +int NeuralNetwork::setDataset(const DatasetDataUsageType &usage, + std::shared_ptr dataset) { + return setDataBuffer(usage, std::static_pointer_cast(dataset)); +} + int NeuralNetwork::allocate(bool trainable) { // TODO: directly replace this manager->initializeTensors(trainable); @@ -539,8 +552,9 @@ int NeuralNetwork::deallocate() { int NeuralNetwork::train(std::vector values) { int status = ML_ERROR_NONE; - if (data_buffer == nullptr) { - ml_loge("Cannot initialize the model without the data buffer."); + if (data_buffers[static_cast(DatasetDataUsageType::DATA_TRAIN)] == + nullptr) { + ml_loge("Cannot initialize the model without the train data buffer."); return ML_ERROR_INVALID_PARAMETER; } @@ -558,15 +572,26 @@ int NeuralNetwork::train(std::vector values) { status = allocate(true); NN_RETURN_STATUS(); - /** Setup data buffer properties */ - status = data_buffer->setClassNum(getOutputDimension()[0].width()); - NN_RETURN_STATUS(); + auto initiate_data_buffer = [this](std::shared_ptr &db) { + /** @todo pass dedicated dimensions for inputs and labels */ + int status = db->setClassNum(getOutputDimension()[0].width()); + NN_RETURN_STATUS(); - status = data_buffer->setFeatureSize(getInputDimension()[0]); - NN_RETURN_STATUS(); + status = db->setFeatureSize(getInputDimension()[0]); + NN_RETURN_STATUS(); - status = data_buffer->init(); - NN_RETURN_STATUS(); + status = db->init(); + NN_RETURN_STATUS(); + + return status; + }; + + for (auto &db : data_buffers) { + if (db != nullptr) { + status = initiate_data_buffer(db); + } + NN_RETURN_STATUS(); + } status = train_run(); @@ -598,19 +623,33 @@ int NeuralNetwork::train_run() { auto &label = last_layer_node->getOutputGrad(0); auto &in = first_layer_node->getInput(0); + /// below constant is needed after changing + /// databuffer having train, valid, test -> train buffer, valid buffer, test + /// buffer After the cahgne, only data train is used inside a databuffer. + /// RUN_CONSTANT is a stub value to deal with the situation + auto RUN_CONSTANT = DatasetDataUsageType::DATA_TRAIN; + + auto &[train_buffer, valid_buffer, test_buffer] = data_buffers; + + if (train_buffer == nullptr) { + ml_loge("[NeuralNetworks] there is no train dataset!"); + return ML_ERROR_INVALID_PARAMETER; + } + for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) { training.loss = 0.0f; - status = data_buffer->run(nntrainer::DatasetDataUsageType::DATA_TRAIN); + status = train_buffer->run(RUN_CONSTANT); if (status != ML_ERROR_NONE) { - data_buffer->clear(DatasetDataUsageType::DATA_TRAIN); + train_buffer->clear(RUN_CONSTANT); return status; } - if (data_buffer - ->getValidation()[(int)nntrainer::DatasetDataUsageType::DATA_TEST]) { - status = data_buffer->run(nntrainer::DatasetDataUsageType::DATA_TEST); + /// @todo make this working, test buffer is running but doing nothing + if (test_buffer != nullptr && + test_buffer->getValidation()[static_cast(RUN_CONSTANT)]) { + status = test_buffer->run(nntrainer::DatasetDataUsageType::DATA_TEST); if (status != ML_ERROR_NONE) { - data_buffer->clear(DatasetDataUsageType::DATA_TEST); + test_buffer->clear(DatasetDataUsageType::DATA_TEST); return status; } } @@ -618,25 +657,23 @@ int NeuralNetwork::train_run() { int count = 0; while (true) { - if (data_buffer->getDataFromBuffer( - nntrainer::DatasetDataUsageType::DATA_TRAIN, in.getData(), - label.getData())) { + if (train_buffer->getDataFromBuffer(RUN_CONSTANT, in.getData(), + label.getData())) { try { forwarding(true); backwarding(iter++); } catch (std::exception &e) { - data_buffer->clear(nntrainer::DatasetDataUsageType::DATA_TRAIN); + train_buffer->clear(RUN_CONSTANT); ml_loge("Error: training error in #%d/%d. %s", epoch_idx, epochs, e.what()); throw; } std::cout << "#" << epoch_idx << "/" << epochs; float loss = getLoss(); - data_buffer->displayProgress( - count++, nntrainer::DatasetDataUsageType::DATA_TRAIN, loss); + train_buffer->displayProgress(count++, RUN_CONSTANT, loss); training.loss += loss; } else { - data_buffer->clear(nntrainer::DatasetDataUsageType::DATA_TRAIN); + train_buffer->clear(RUN_CONSTANT); break; } } @@ -650,22 +687,21 @@ int NeuralNetwork::train_run() { std::cout << "#" << epoch_idx << "/" << epochs << " - Training Loss: " << training.loss; - if (data_buffer - ->getValidation()[(int)nntrainer::DatasetDataUsageType::DATA_VAL]) { + if (valid_buffer != nullptr && + valid_buffer->getValidation()[static_cast(RUN_CONSTANT)]) { int right = 0; validation.loss = 0.0f; unsigned int tcases = 0; - status = data_buffer->run(nntrainer::DatasetDataUsageType::DATA_VAL); + status = valid_buffer->run(RUN_CONSTANT); if (status != ML_ERROR_NONE) { - data_buffer->clear(DatasetDataUsageType::DATA_VAL); + valid_buffer->clear(RUN_CONSTANT); return status; } while (true) { - if (data_buffer->getDataFromBuffer( - nntrainer::DatasetDataUsageType::DATA_VAL, in.getData(), - label.getData())) { + if (valid_buffer->getDataFromBuffer(RUN_CONSTANT, in.getData(), + label.getData())) { forwarding(false); auto model_out = output.argmax(); auto label_out = label.argmax(); @@ -676,7 +712,7 @@ int NeuralNetwork::train_run() { validation.loss += getLoss(); tcases++; } else { - data_buffer->clear(nntrainer::DatasetDataUsageType::DATA_VAL); + valid_buffer->clear(RUN_CONSTANT); break; } } @@ -746,8 +782,13 @@ int NeuralNetwork::setOptimizer( return ML_ERROR_NONE; } -int NeuralNetwork::setDataBuffer(std::shared_ptr data_buffer) { - this->data_buffer = data_buffer; +int NeuralNetwork::setDataBuffer(const DatasetDataUsageType &usage, + std::shared_ptr data_buffer) { + if (data_buffer == nullptr) { + return ML_ERROR_INVALID_PARAMETER; + } + + this->data_buffers[static_cast(usage)] = data_buffer; return ML_ERROR_NONE; } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 169da58..9398cde 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -24,6 +24,7 @@ #define __NEURALNET_H__ #ifdef __cplusplus +#include #include #include #include @@ -32,7 +33,6 @@ #endif #include -#include #include #include #include @@ -44,6 +44,12 @@ #include #include +namespace ml::train { +class DataSet; +enum class DatasetType; +enum class DatasetDataUsageType; +} // namespace ml::train + namespace nntrainer { /** @@ -51,6 +57,9 @@ namespace nntrainer { */ using NetType = ml::train::ModelType; +class DataBuffer; +using DatasetType = ml::train::DatasetType; +using DatasetDataUsageType = ml::train::DatasetDataUsageType; /** * @brief Statistics from running or training a model */ @@ -89,7 +98,7 @@ public: weight_initializer(WeightInitializer::WEIGHT_UNKNOWN), net_type(NetType::UNKNOWN), manager(std::make_shared()), - data_buffer(nullptr), + data_buffers({nullptr, nullptr, nullptr}), continue_train(false), initialized(false), compiled(false), @@ -269,21 +278,23 @@ public: /** * @brief Run NeuralNetwork train with callback function by user + * @param[in] dt datatype (usage) where it should be * @param[in] dataset set the dataset * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - int setDataset(std::shared_ptr dataset) { - return setDataBuffer(std::static_pointer_cast(dataset)); - } + int setDataset(const DatasetDataUsageType &dt, + std::shared_ptr dataset); /** * @brief Run NeuralNetwork train with callback function by user + * @param[in] dt datatype (usage) where it should be * @param[in] databuffer set the databuffer * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - int setDataBuffer(std::shared_ptr data_buffer); + int setDataBuffer(const DatasetDataUsageType &dt, + std::shared_ptr data_buffer); /** * @brief add layer into neural network model @@ -511,7 +522,8 @@ private: std::shared_ptr manager; /**< nntrainer manager */ - std::shared_ptr data_buffer; /**< Data Buffer to get Input */ + std::array, 3> + data_buffers; /**< Data Buffers to get Input */ bool continue_train; /**< Continue train from the previous state of optimizer and iterations */ @@ -575,7 +587,7 @@ private: swap(lhs.save_path, rhs.save_path); swap(lhs.opt, rhs.opt); swap(lhs.net_type, rhs.net_type); - swap(lhs.data_buffer, rhs.data_buffer); + swap(lhs.data_buffers, rhs.data_buffers); swap(lhs.continue_train, rhs.continue_train); swap(lhs.initialized, rhs.initialized); swap(lhs.model_graph, rhs.model_graph); diff --git a/nntrainer/utils/parse_util.cpp b/nntrainer/utils/parse_util.cpp index 5c9f5ea..4d954da 100644 --- a/nntrainer/utils/parse_util.cpp +++ b/nntrainer/utils/parse_util.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/test/ccapi/unittest_ccapi.cpp b/test/ccapi/unittest_ccapi.cpp index 6a0e5ca..3af3b64 100644 --- a/test/ccapi/unittest_ccapi.cpp +++ b/test/ccapi/unittest_ccapi.cpp @@ -204,7 +204,7 @@ TEST(nntrainer_ccapi, train_with_config_01_p) { EXPECT_NO_THROW(model->train()); EXPECT_NEAR(model->getTrainingLoss(), 4.434051, tolerance); - EXPECT_NEAR(model->getValidationLoss(), 2.9646113, tolerance); + EXPECT_NEAR(model->getValidationLoss(), 2.910938, tolerance); } /** @@ -237,12 +237,21 @@ TEST(nntrainer_ccapi, train_dataset_with_file_01_p) { "beta1=0.002", "beta2=0.001", "epsilon=1e-7"})); EXPECT_NO_THROW(model->setOptimizer(optimizer)); - EXPECT_NO_THROW(dataset = ml::train::createDataset( - ml::train::DatasetType::FILE, - getTestResPath("trainingSet.dat").c_str(), - getTestResPath("valSet.dat").c_str(), nullptr)); + EXPECT_NO_THROW( + dataset = ml::train::createDataset( + ml::train::DatasetType::FILE, getTestResPath("trainingSet.dat").c_str())); + EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset), + ML_ERROR_NONE); + + EXPECT_NO_THROW( + dataset = ml::train::createDataset(ml::train::DatasetType::FILE, + getTestResPath("valSet.dat").c_str())); EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); - EXPECT_EQ(model->setDataset(dataset), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset), + ML_ERROR_NONE); EXPECT_EQ(model->setProperty({"loss=cross", "batch_size=16", "epochs=2", "save_path=model.bin"}), @@ -251,8 +260,8 @@ TEST(nntrainer_ccapi, train_dataset_with_file_01_p) { EXPECT_EQ(model->initialize(), ML_ERROR_NONE); EXPECT_NO_THROW(model->train()); - EXPECT_NEAR(model->getTrainingLoss(), 2.1934659, tolerance); - EXPECT_NEAR(model->getValidationLoss(), 2.2051108, tolerance); + EXPECT_NEAR(model->getTrainingLoss(), 2.1866805, tolerance); + EXPECT_NEAR(model->getValidationLoss(), 2.18779993, tolerance); } /** @@ -285,11 +294,19 @@ TEST(nntrainer_ccapi, train_dataset_with_generator_01_p) { "beta1=0.002", "beta2=0.001", "epsilon=1e-7"})); EXPECT_NO_THROW(model->setOptimizer(optimizer)); - EXPECT_NO_THROW( - dataset = ml::train::createDataset(ml::train::DatasetType::GENERATOR, - getBatch_train, getBatch_val, nullptr)); + EXPECT_NO_THROW(dataset = ml::train::createDataset( + ml::train::DatasetType::GENERATOR, getBatch_train)); EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); - EXPECT_EQ(model->setDataset(dataset), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset), + ML_ERROR_NONE); + + EXPECT_NO_THROW(dataset = ml::train::createDataset( + ml::train::DatasetType::GENERATOR, getBatch_val)); + EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset), + ML_ERROR_NONE); EXPECT_EQ(model->setProperty({"loss=cross", "batch_size=16", "epochs=2", "save_path=model.bin"}), @@ -332,12 +349,21 @@ TEST(nntrainer_ccapi, train_batch_size_update_after) { "beta1=0.002", "beta2=0.001", "epsilon=1e-7"})); EXPECT_NO_THROW(model->setOptimizer(optimizer)); - EXPECT_NO_THROW(dataset = ml::train::createDataset( - ml::train::DatasetType::FILE, - getTestResPath("trainingSet.dat").c_str(), - getTestResPath("valSet.dat").c_str(), nullptr)); + EXPECT_NO_THROW( + dataset = ml::train::createDataset( + ml::train::DatasetType::FILE, getTestResPath("trainingSet.dat").c_str())); + EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_TRAIN, dataset), + ML_ERROR_NONE); + + EXPECT_NO_THROW( + dataset = ml::train::createDataset(ml::train::DatasetType::FILE, + getTestResPath("valSet.dat").c_str())); EXPECT_EQ(dataset->setProperty({"buffer_size=100"}), ML_ERROR_NONE); - EXPECT_EQ(model->setDataset(dataset), ML_ERROR_NONE); + EXPECT_EQ( + model->setDataset(ml::train::DatasetDataUsageType::DATA_VAL, dataset), + ML_ERROR_NONE); EXPECT_EQ(model->setProperty({"loss=cross", "batch_size=16", "epochs=1"}), ML_ERROR_NONE); @@ -364,8 +390,8 @@ TEST(nntrainer_ccapi, train_batch_size_update_after) { EXPECT_EQ(model->setProperty({"batch_size=4"}), ML_ERROR_NONE); EXPECT_NO_THROW(model->train()); - EXPECT_NEAR(model->getTrainingLoss(), 1.9613363, tolerance); - EXPECT_NEAR(model->getValidationLoss(), 2.1835098, tolerance); + EXPECT_NEAR(model->getTrainingLoss(), 1.928810, tolerance); + EXPECT_NEAR(model->getValidationLoss(), 2.17899, tolerance); } /** diff --git a/test/tizen_capi/unittest_tizen_capi.cpp b/test/tizen_capi/unittest_tizen_capi.cpp index 59ed61b..e191a3a 100644 --- a/test/tizen_capi/unittest_tizen_capi.cpp +++ b/test/tizen_capi/unittest_tizen_capi.cpp @@ -349,7 +349,7 @@ TEST(nntrainer_capi_nnmodel, train_01_p) { EXPECT_EQ(status, ML_ERROR_NONE); /** Compare training statistics */ - nntrainer_capi_model_comp_metrics(handle, 4.01373, 3.55134, 10.4167); + nntrainer_capi_model_comp_metrics(handle, 4.01373, 3.50392, 10.4167); status = ml_train_model_destroy(handle); EXPECT_EQ(status, ML_ERROR_NONE); @@ -745,7 +745,7 @@ TEST(nntrainer_capi_nnmodel, train_with_file_01_p) { EXPECT_EQ(status, ML_ERROR_NONE); /** Compare training statistics */ - nntrainer_capi_model_comp_metrics(model, 2.13067, 2.19975, 20.8333); + nntrainer_capi_model_comp_metrics(model, 2.12599992, 2.200589, 20.8333); status = ml_train_model_destroy(model); EXPECT_EQ(status, ML_ERROR_NONE); -- 2.7.4