This patch implements dataset saver/loader.
This patch propose sections "train_set", "valid_set", "test_set". Just
like optimizer.
**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
#include <tensor_dim.h>
namespace nntrainer {
+class Exporter;
+enum class ExportMethods;
+
/**
* @brief DataProducer interface used to abstract data provider
*
return SIZE_UNDEFINED;
}
+ /**
+ * @brief this function helps exporting the dataproducer in a predefined
+ * format, while workarounding issue caused by templated function type eraser
+ *
+ * @param exporter exporter that conatins exporting logic
+ * @param method enum value to identify how it should be exported to
+ */
+ virtual void exportTo(Exporter &exporter, const ExportMethods &method) const {
+ }
+
/**
* @brief denote if given producer is thread safe and can be parallelized.
* @note if size() == SIZE_UNDEFIEND, thread safe shall be false
#include <base_properties.h>
#include <cassert>
#include <climits>
-#include <condition_variable>
#include <cstring>
#include <databuffer.h>
+#include <func_data_producer.h>
#include <functional>
#include <iomanip>
-#include <mutex>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
-#include <parse_util.h>
#include <sstream>
#include <stdexcept>
#include <stdio.h>
NNTR_THROW_IF(!producer, std::invalid_argument) << "producer is empty";
return producer->getType();
}
+
+void DataBuffer::exportTo(Exporter &exporter,
+ const ExportMethods &method) const {
+ if (producer) {
+ producer->exportTo(exporter, method);
+ }
+ exporter.saveResult(*db_props, method, this);
+}
+
+bool DataBuffer::isSerializable(const ExportMethods &method) const {
+ if (method != ExportMethods::METHOD_STRINGVECTOR) {
+ return false;
+ }
+ if (!producer) {
+ return false;
+ }
+
+ /// @todo this should be query from producer->isSerializable
+ if (producer->getType() == FuncDataProducer::type) {
+ return false;
+ }
+ return true;
+}
} /* namespace nntrainer */
namespace nntrainer {
+class Exporter;
+enum class ExportMethods;
+
/**
* @brief Aliasing from ccapi ml::train
*/
*/
const std::string getType() const;
+ /**
+ * @brief this function helps exporting the dataset in a predefined format,
+ * while workarounding issue caused by templated function type eraser
+ *
+ * @param exporter exporter that conatins exporting logic
+ * @param method enum value to identify how it should be exported to
+ */
+ void exportTo(Exporter &exporter, const ExportMethods &method) const;
+
+ /**
+ * @brief check if given databuffer is exportable, this is needed because some
+ * data producer, mainly FuncDataProducer, cannot be serialized
+ *
+ * @param method proposed method
+ * @return bool true if serializable
+ */
+ bool isSerializable(const ExportMethods &method) const;
+
protected:
std::shared_ptr<DataProducer> producer;
std::weak_ptr<IterationQueue> iq_view;
return last;
};
}
+
+void FuncDataProducer::exportTo(Exporter &exporter,
+ const ExportMethods &method) const {}
+
} // namespace nntrainer
namespace nntrainer {
class PropsUserData;
+class Exporter;
+enum class ExportMethods;
using datagen_cb = ml::train::datagen_cb;
const std::vector<TensorDim> &label_dims,
void *user_data = nullptr) override;
+ /**
+ * @copydoc DataProducer::exportTo(Exporter &exporter, ExportMethods method)
+ */
+ void exportTo(Exporter &exporter, const ExportMethods &method) const override;
+
private:
datagen_cb cb;
std::unique_ptr<PropsUserData> user_data_prop;
return file_size / (sample_size * RawFileDataProducer::pixel_size);
}
+
+void RawFileDataProducer::exportTo(Exporter &exporter,
+ const ExportMethods &method) const {
+ exporter.saveResult(*raw_file_props, method, this);
+}
} // namespace nntrainer
unsigned int size(const std::vector<TensorDim> &input_dims,
const std::vector<TensorDim> &label_dims) const override;
+ /**
+ * @copydoc DataProducer::exportTo(Exporter &exporter, ExportMethods method)
+ */
+ void exportTo(Exporter &exporter, const ExportMethods &method) const override;
+
private:
std::ifstream file;
using PropTypes = std::tuple<props::FilePath>;
namespace nntrainer {
-namespace props {
-class ActivationType;
-
-} // namespace props
-
/**
* @class Activation Layer
* @brief Activation Layer
* @brief load dataset config from ini
*/
int ModelLoader::loadDatasetConfigIni(dictionary *ini, NeuralNetwork &model) {
- int status = ML_ERROR_NONE;
-
- if (iniparser_find_entry(ini, "Dataset") == 0) {
- return ML_ERROR_NONE;
- }
-
- if (iniparser_find_entry(ini, "DataSet:Tflite")) {
- ml_loge("Error: Tflite dataset is not yet implemented!");
- return ML_ERROR_INVALID_PARAMETER;
- }
+ /************ helper functors **************/
+ auto try_parse_datasetsection_for_backward_compatibility = [&]() -> int {
+ int status = ML_ERROR_NONE;
+ if (iniparser_find_entry(ini, "Dataset") == 0) {
+ return ML_ERROR_NONE;
+ }
- /// @todo ini bufferSize -> buffer_size to unify
- std::string bufsizepros("buffer_size=");
- bufsizepros += iniparser_getstring(ini, "DataSet:BufferSize", "1");
+ ml_logw("Using dataset section is deprecated, please consider using "
+ "train_set, valid_set, test_set sections");
+
+ /// @note DataSet:BufferSize is parsed for backward compatibility
+ std::string bufsizepros("buffer_size=");
+ bufsizepros +=
+ iniparser_getstring(ini, "DataSet:BufferSize",
+ iniparser_getstring(ini, "DataSet:buffer_size", "1"));
+
+ auto parse_and_set = [&](const char *key, DatasetModeType dt,
+ bool required) -> int {
+ const char *path = iniparser_getstring(ini, key, NULL);
+
+ if (path == NULL) {
+ return required ? ML_ERROR_INVALID_PARAMETER : ML_ERROR_NONE;
+ }
+
+ try {
+ model.data_buffers[static_cast<int>(dt)] =
+ createDataBuffer(DatasetType::FILE, resolvePath(path).c_str());
+ model.data_buffers[static_cast<int>(dt)]->setProperty({bufsizepros});
+ } catch (...) {
+ ml_loge("path is not valid, path: %s", resolvePath(path).c_str());
+ return ML_ERROR_INVALID_PARAMETER;
+ }
+
+ return ML_ERROR_NONE;
+ };
+
+ status =
+ parse_and_set("DataSet:TrainData", DatasetModeType::MODE_TRAIN, true);
+ NN_RETURN_STATUS();
+ status =
+ parse_and_set("DataSet:ValidData", DatasetModeType::MODE_VALID, false);
+ NN_RETURN_STATUS();
+ status =
+ parse_and_set("DataSet:TestData", DatasetModeType::MODE_TEST, false);
+ NN_RETURN_STATUS();
+ const char *path = iniparser_getstring(ini, "Dataset:LabelData", NULL);
+ if (path != NULL) {
+ ml_logi("setting labelData is deprecated!, it is essentially noop now!");
+ }
- std::function<int(const char *, DatasetModeType, bool)> parse_and_set =
- [&](const char *key, DatasetModeType dt, bool required) -> int {
- const char *path = iniparser_getstring(ini, key, NULL);
+ ml_logd("parsing dataset done");
+ return status;
+ };
- if (path == NULL) {
- return required ? ML_ERROR_INVALID_PARAMETER : ML_ERROR_NONE;
+ auto parse_buffer_section = [ini, this,
+ &model](const std::string §ion_name,
+ DatasetModeType type) -> int {
+ if (iniparser_find_entry(ini, section_name.c_str()) == 0) {
+ return ML_ERROR_NONE;
+ }
+ const char *db_type =
+ iniparser_getstring(ini, (section_name + ":type").c_str(), unknown);
+ auto &db = model.data_buffers[static_cast<int>(type)];
+
+ /// @todo delegate this to app context (currently there is only file
+ /// databuffer so file is directly used)
+ if (!istrequal(db_type, "file")) {
+ ml_loge("databuffer type is unknonw, type: %s", db_type);
+ return ML_ERROR_INVALID_PARAMETER;
}
try {
- model.data_buffers[static_cast<int>(dt)] =
- createDataBuffer(DatasetType::FILE, resolvePath(path).c_str());
- model.data_buffers[static_cast<int>(dt)]->setProperty({bufsizepros});
- } catch (...) {
- ml_loge("path is not valid, path: %s", resolvePath(path).c_str());
+ db = createDataBuffer(DatasetType::FILE);
+ const std::vector<std::string> properties =
+ parseProperties(ini, section_name, {"type"});
+
+ db->setProperty(properties);
+ } catch (std::exception &e) {
+ ml_loge("error while creating and setting dataset, %s", e.what());
return ML_ERROR_INVALID_PARAMETER;
}
return ML_ERROR_NONE;
};
- status =
- parse_and_set("DataSet:TrainData", DatasetModeType::MODE_TRAIN, true);
+ /************ start of the procedure **************/
+ int status = ML_ERROR_NONE;
+ status = try_parse_datasetsection_for_backward_compatibility();
+ NN_RETURN_STATUS();
+
+ status = parse_buffer_section("train_set", DatasetModeType::MODE_TRAIN);
NN_RETURN_STATUS();
- status =
- parse_and_set("DataSet:ValidData", DatasetModeType::MODE_VALID, false);
+ status = parse_buffer_section("valid_set", DatasetModeType::MODE_VALID);
NN_RETURN_STATUS();
- status = parse_and_set("DataSet:TestData", DatasetModeType::MODE_TEST, false);
+ status = parse_buffer_section("test", DatasetModeType::MODE_TEST);
NN_RETURN_STATUS();
- const char *path = iniparser_getstring(ini, "Dataset:LabelData", NULL);
- if (path != NULL) {
- ml_logi("setting labelData is deprecated!, it is essentially noop now!");
- }
- ml_logd("parsing dataset done");
return status;
}
"permitted, path: "
<< file_path;
+ std::vector<IniSection> sections;
+
IniSection model_section = IniSection::FromExportable("model", *this);
model_section.setEntry("type", "NeuralNetwork");
+ sections.push_back(model_section);
+
+ auto add_section_if_any = [§ions](const std::string §ion_name,
+ auto obj_ptr, auto pred) {
+ if (pred(obj_ptr)) {
+ IniSection s = IniSection::FromExportable(section_name, *obj_ptr);
+ s.setEntry("type", obj_ptr->getType());
+ sections.push_back(s);
+ }
+ };
+
+ add_section_if_any("optimizer", opt,
+ [](const auto &obj) { return static_cast<bool>(obj); });
+
+ auto &[train_buffer, valid_buffer, test_buffer] = data_buffers;
+ auto data_buffer_valid = [](const auto &buffer) {
+ return buffer && buffer->isSerializable(ExportMethods::METHOD_STRINGVECTOR);
+ };
- IniSection optimizer_section = IniSection::FromExportable("optimizer", *opt);
- optimizer_section.setEntry("type", opt->getType());
+ add_section_if_any("train_set", train_buffer, data_buffer_valid);
+ add_section_if_any("valid_set", valid_buffer, data_buffer_valid);
+ add_section_if_any("test_set", test_buffer, data_buffer_valid);
- IniWrapper wrapper("model_saver", {model_section, optimizer_section});
+ IniWrapper wrapper("model_saver", sections);
wrapper.save_ini(file_path);
IniGraphInterpreter interpreter;
- /// @todo serialize dataset props
- /// @todo serialize optimizer props
interpreter.serialize(model_graph, file_path);
}
model = ml::train::createModel(ml::train::ModelType::NEURAL_NET);
ScopedIni s("simple_ini", {model_base + "batch_size = 16", optimizer,
dataset + "-BufferSize", inputlayer, outputlayer});
+
+ std::shared_ptr<ml::train::Dataset> dataset = ml::train::createDataset(
+ ml::train::DatasetType::FILE, getTestResPath("trainingSet.dat").c_str());
+ EXPECT_NO_THROW(dataset->setProperty({"buffer_size=100"}));
+ EXPECT_EQ(model->setDataset(ml::train::DatasetModeType::MODE_TRAIN, dataset),
+ ML_ERROR_NONE);
+
EXPECT_EQ(model->loadFromConfig(s.getIniName()), ML_ERROR_NONE);
EXPECT_EQ(model->compile(), ML_ERROR_NONE);
EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
"TestData = testSet.dat |"
"ValidData = valSet.dat");
+static nntrainer::IniSection train_set("train_set", "BufferSize = 100 |"
+ "type = file | "
+ "path = trainingSet.dat");
+static nntrainer::IniSection valid_set("valid_set", "BufferSize = 100 |"
+ "type = file | "
+ "path = valSet.dat");
+static nntrainer::IniSection test_set("test_set", "BufferSize = 100 |"
+ "type = file | "
+ "path = testSet.dat");
+
static nntrainer::IniSection loss_cross("loss", "Type = cross");
static nntrainer::IniSection loss_cross_softmax("loss", "Type = cross_softmax");
/// negative tests
// clang-format off
INSTANTIATE_TEST_CASE_P(
- nntrainerIniAutoTests, nntrainerIniTest, ::testing::Values(
+ nntrainerIniAutoTests_p, nntrainerIniTest, ::testing::Values(
/**< positive: basic valid scenarios (2 positive and 3 negative cases) */
mkIniTc("basic_p", {nw_base_mse, adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
mkIniTc("basic2_p", {nw_base_mse, sgd, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
mkIniTc("basic_dataset_p", {nw_base_cross, adam, dataset, input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("basic_dataset2_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", dataset}, SUCCESS),
mkIniTc("basic_dataset3_p", {dataset, nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic_trainset_p", {nw_base_cross, adam, train_set, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic_testset_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", train_set}, SUCCESS),
+ mkIniTc("basic_train_valid_p", {dataset, nw_base_cross, sgd, input, out+"input_layers=inputlayer", train_set, valid_set}, SUCCESS),
+ mkIniTc("basic_all_p", {dataset, nw_base_cross, test_set, sgd, input, out+"input_layers=inputlayer", train_set, valid_set}, SUCCESS),
+ mkIniTc("basic_test_train_valid_p", {dataset, nw_base_cross, test_set, sgd, input, out+"input_layers=inputlayer", train_set, valid_set}, SUCCESS),
mkIniTc("basic_conv2d_p", {nw_base_cross, adam, conv2d + "input_shape = 1:10:10"}, SUCCESS),
mkIniTc("no_testSet_p", {nw_base_cross, adam, dataset + "-TestData", input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("no_validSet_p", {nw_base_cross, adam, dataset + "-ValidData", input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("loss_layer6_p", {nw_base, adam, input + "-Activation", out, loss_cross_sigmoid}, SUCCESS),
mkIniTc("loss_layer7_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross_softmax}, SUCCESS),
mkIniTc("loss_layer8_p", {nw_base, adam, input + "-Activation", out, loss_cross_softmax}, SUCCESS),
+ mkIniTc("unknown_loss_p", {nw_base_cross + "loss=", adam, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("mse_with_relu_p", {nw_base_mse, sgd, input, out+"input_layers=inputlayer", act_relu}, SUCCESS),
+ mkIniTc("no_loss_with_relu_p", {nw_base, sgd, input, out+"input_layers=inputlayer", act_relu}, SUCCESS)
+), [](const testing::TestParamInfo<nntrainerIniTest::ParamType>& info){
+ return std::get<0>(info.param);
+});
+INSTANTIATE_TEST_CASE_P(
+ nntrainerIniAutoTests_n, nntrainerIniTest, ::testing::Values(
/**< half negative: init fail cases (1 positive and 4 negative cases) */
- mkIniTc("unknown_loss_p", {nw_base_cross + "loss=", adam, input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("cross_with_relu_n", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", act_relu+"input_layers=fclayer" }, COMPFAIL | INITFAIL),
mkIniTc("cross_with_relu2_n", {nw_base_cross, sgd, input, out+"input_layers=inputlayer" + "-Activation", act_relu+"input_layers=fclayer" }, COMPFAIL | INITFAIL),
- mkIniTc("mse_with_relu_p", {nw_base_mse, sgd, input, out+"input_layers=inputlayer", act_relu}, SUCCESS),
- mkIniTc("no_loss_with_relu_p", {nw_base, sgd, input, out+"input_layers=inputlayer", act_relu}, SUCCESS),
mkIniTc("basic_conv2d_n", {nw_base_cross, adam, conv2d + "input_shape = 1:1:62720"}, INITFAIL),
/**< negative: basic invalid scenarios (5 negative cases) */
mkIniTc("empty_n", {}, ALLFAIL),
mkIniTc("no_layers_n", {nw_base_cross, adam}, ALLFAIL),
mkIniTc("no_layers_2_n", {nw_base_cross, adam, dataset}, ALLFAIL),
- /// #391
- // mkIniTc("ini_has_empty_value_n", {nw_base_cross, adam + "epsilon = _", input, out}, ALLFAIL),
+ mkIniTc("ini_has_empty_value_n", {nw_base_cross, adam + "epsilon = _", input, out}, ALLFAIL),
/**< negative: property(hyperparam) validation (5 negative cases) */
mkIniTc("wrong_opt_type_n", {nw_base_cross, adam + "Type = wrong_opt", input, out+"input_layers=inputlayer"}, ALLFAIL),
/**< negative: little bit of tweeks to check determinancy (5 negative cases) */
mkIniTc("wrong_nw_dataset_n", {nw_base_cross, adam, input, out+"input_layers=inputlayer", dataset + "-TrainData"}, ALLFAIL),
mkIniTc("wrong_nw_dataset2_n", {nw_base_cross, adam, dataset + "-TrainData", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_train_set_no_typen", {nw_base_cross, adam, train_set + "-type", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_train_set_wrong_type_n", {nw_base_cross, adam, train_set + "type = asdf", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_valid_set_no_typen", {nw_base_cross, adam, valid_set + "-type", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_valid_set_wrong_type_n", {nw_base_cross, adam, valid_set + "type = asdf", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_test_set_no_typen", {nw_base_cross, adam, test_set + "-type", input, out+"input_layers=inputlayer"}, ALLFAIL),
+ mkIniTc("wrong_nw_test_set_wrong_type_n", {nw_base_cross, adam, test_set + "type = asdf", input, out+"input_layers=inputlayer"}, ALLFAIL),
/**< negative: dataset is not complete (5 negative cases) */
mkIniTc("no_trainingSet_n", {nw_base_cross, adam, dataset + "-TrainData", input, out+"input_layers=inputlayer"}, ALLFAIL),
), [](const testing::TestParamInfo<nntrainerIniTest::ParamType>& info){
return std::get<0>(info.param);
});
+
// clang-format on
/**