--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file cifar_dataloader.h
+ * @date 24 Jun 2021s
+ * @brief dataloader for cifar
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include "cifar_dataloader.h"
+
+#include <nntrainer_error.h>
+#include <random>
+
+namespace nntrainer::resnet {
+
+RandomDataLoader::RandomDataLoader(const std::vector<TensorDim> &input_shapes,
+ const std::vector<TensorDim> &output_shapes,
+ int data_size) :
+ iteration(0),
+ iteration_for_one_epoch(data_size),
+ input_shapes(input_shapes),
+ output_shapes(output_shapes),
+ input_dist(0, 255),
+ label_dist(0, output_shapes.front().width() - 1) {
+ NNTR_THROW_IF(output_shapes.empty(), std::invalid_argument)
+ << "output_shape size empty not supported";
+ NNTR_THROW_IF(output_shapes.size() > 1, std::invalid_argument)
+ << "output_shape size > 1 is not supported";
+
+ iteration_for_one_epoch /= output_shapes.front().batch();
+}
+
+void RandomDataLoader::next(float **input, float **label, bool *last) {
+ auto fill_last = [&last, this] {
+ if (iteration++ == iteration_for_one_epoch) {
+ iteration = 0;
+ *last = true;
+ } else {
+ *last = false;
+ }
+ return *last;
+ };
+
+ auto fill_input = [this](float *input, unsigned int length) {
+ for (unsigned int i = 0; i < length; ++i) {
+ *input = input_dist(rng);
+ input++;
+ }
+ };
+
+ auto fill_label = [this](float *label, unsigned int batch,
+ unsigned int length) {
+ for (unsigned int i = 0; i < batch; ++i) {
+ unsigned int generated_label = label_dist(rng);
+
+ switch (length) {
+ case 1: { /// case of single integer value
+ *label = generated_label;
+ label++;
+ break;
+ }
+ default: { /// case of one hot
+ for (unsigned int j = 0; j < length; ++j) {
+ *label = (generated_label == j);
+ label++;
+ }
+ break;
+ }
+ }
+ }
+ };
+
+ if (fill_last() == true) {
+ return;
+ }
+
+ float **cur_input_tensor = input;
+ for (unsigned int i = 0; i < input_shapes.size(); ++i) {
+ fill_input(*cur_input_tensor, input_shapes.at(i).getDataLen());
+ cur_input_tensor++;
+ }
+
+ float **cur_label_tensor = label;
+ for (unsigned int i = 0; i < output_shapes.size(); ++i) {
+ fill_label(*label, output_shapes.at(i).batch(),
+ output_shapes.at(i).getFeatureLen());
+ cur_label_tensor++;
+ }
+}
+
+Cifar100DataLoader::Cifar100DataLoader(const std::string &path, int batch_size,
+ int splits) {
+ /// NYI!
+}
+
+void Cifar100DataLoader::next(float **input, float **label, bool *last) {
+ /// NYI!
+ *last = true;
+}
+
+} // namespace nntrainer::resnet
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file cifar_dataloader.h
+ * @date 24 Jun 2021
+ * @brief dataloader for cifar 100
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tensor_dim.h>
+
+#include <random>
+#include <string>
+#include <vector>
+
+namespace nntrainer::resnet {
+/**
+ * @brief DataLoader interface used to load cifar data
+ *
+ */
+class DataLoader {
+public:
+ /**
+ * @brief Destroy the Data Loader object
+ *
+ */
+ virtual ~DataLoader() {}
+
+ /**
+ * @brief create an iteration to fed to the generator callback
+ *
+ * @param[out] input list of inputs that is already allocated by nntrainer,
+ * and this function is obliged to fill
+ * @param[out] label list of label that is already allocated by nntrainer, and
+ * this function is obliged to fill
+ * @param[out] last optional property to set when the epoch has finished
+ */
+ virtual void next(float **input, float **label, bool *last) = 0;
+};
+
+/**
+ * @brief RandomData Generator
+ *
+ */
+class RandomDataLoader final : public DataLoader {
+public:
+ /**
+ * @brief Construct a new Random Data Loader object
+ *
+ * @param input_shapes input_shapes with appropriate batch
+ * @param output_shapes label_shapes with appropriate batch
+ * @param iteration iteration per epoch
+ */
+ RandomDataLoader(const std::vector<TensorDim> &input_shapes,
+ const std::vector<TensorDim> &output_shapes, int iteration);
+
+ /**
+ * @brief Destroy the Random Data Loader object
+ *
+ */
+ ~RandomDataLoader() {}
+
+ /**
+ * @copydoc void DataLoader::next(float **input, float**label, bool *last)
+ */
+ void next(float **input, float **label, bool *last);
+
+private:
+ unsigned int iteration;
+ unsigned int iteration_for_one_epoch;
+
+ std::vector<TensorDim> input_shapes;
+ std::vector<TensorDim> output_shapes;
+
+ std::mt19937 rng;
+ std::uniform_int_distribution<int> input_dist;
+ std::uniform_int_distribution<int> label_dist;
+};
+
+/**
+ * @brief Cifar100DataLoader class
+ *
+ */
+class Cifar100DataLoader final : public DataLoader {
+public:
+ /**
+ * @brief Construct a new Cifar100 Data Loader object
+ *
+ * @param path path to read from
+ * @param batch_size batch_size of current model
+ * @param splits split divisor of the file 1 means using whole data, 2 means
+ * half of the data, 10 means 10% of the data
+ */
+ Cifar100DataLoader(const std::string &path, int batch_size, int splits);
+
+ /**
+ * @brief Destroy the Cifar100 Data Loader object
+ *
+ */
+ ~Cifar100DataLoader() {}
+
+ /**
+ * @copydoc void DataLoader::next(float **input, float**label, bool *last)
+ */
+ void next(float **input, float **label, bool *last);
+
+private:
+ std::vector<unsigned int> idxes; /**< index information for one epoch */
+};
+
+} // namespace nntrainer::resnet
#include <model.h>
#include <optimizer.h>
+#include <cifar_dataloader.h>
+
using LayerHandle = std::shared_ptr<ml::train::Layer>;
using ModelHandle = std::unique_ptr<ml::train::Model>;
+using UserDataType =
+ std::vector<std::unique_ptr<nntrainer::resnet::DataLoader>>;
+
/**
* @brief make "key=value" from key and value
*
return createLayer("conv2d", props);
};
- auto create_batch_relu = [&with_name](const std::string &name) {
- return createLayer("batch_normalization",
- {with_name(name), "activation=relu"});
- };
-
/** residual path */
LayerHandle a1 = create_conv("a1", 3, downsample ? 2 : 1, 1, input_name);
- LayerHandle a2 = create_batch_relu("a2");
+ LayerHandle a2 = createLayer(
+ "batch_normalization", {with_name("a2"), withKey("activation", "relu")});
LayerHandle a3 = create_conv("a3", 3, 1, 1, scoped_name("a2"));
/** skip path */
"Addition",
{with_name("c1"), withKey("input_layers", {scoped_name("a3"), skip_name})});
- LayerHandle c2 = create_batch_relu(""); /// use block_name itself.
+ LayerHandle c2 =
+ createLayer("batch_normalization",
+ {withKey("name", block_name), withKey("activation", "relu")});
if (downsample) {
return {a1, a2, a3, b1, c1, c2};
layers.push_back(
createLayer("conv2d", {
withKey("name", "conv0"),
+ withKey("input_shape", "3:32:32"),
+ withKey("filters", 64),
withKey("kernel_size", {3, 3}),
withKey("stride", {1, 1}),
withKey("padding", {1, 1}),
/// @todo update createResnet18 to be more generic
ModelHandle createResnet18() {
- ModelHandle model =
- ml::train::createModel(ml::train::ModelType::NEURAL_NET,
- {withKey("loss", "cross"),
- withKey("batch_size", 128), withKey("epochs", 60)});
+ ModelHandle model = ml::train::createModel(ml::train::ModelType::NEURAL_NET,
+ {withKey("loss", "cross")});
for (auto layers : createResnet18Graph()) {
model->addLayer(layers);
return model;
}
-ml_train_datagen_cb train_cb, valid_cb;
+int trainData_cb(float **input, float **label, bool *last, void *user_data) {
+ auto data = reinterpret_cast<
+ std::vector<std::unique_ptr<nntrainer::resnet::DataLoader>> *>(user_data);
-void create_and_run() {
+ data->at(0)->next(input, label, last);
+ return 0;
+}
+
+int validData_cb(float **input, float **label, bool *last, void *user_data) {
+ auto data = reinterpret_cast<
+ std::vector<std::unique_ptr<nntrainer::resnet::DataLoader>> *>(user_data);
+
+ data->at(1)->next(input, label, last);
+ return 0;
+}
+
+/// @todo maybe make num_class also a parameter
+void createAndRun(unsigned int epochs, unsigned int batch_size,
+ UserDataType *user_data) {
ModelHandle model = createResnet18();
+ model->setProperty(
+ {withKey("batch_size", batch_size), withKey("epochs", epochs)});
auto optimizer = ml::train::createOptimizer("adam");
model->setOptimizer(std::move(optimizer));
+
+ int status = model->compile();
+ if (status != ML_ERROR_NONE) {
+ throw std::invalid_argument("model compilation failed!");
+ }
+
+ status = model->initialize();
+ if (status != ML_ERROR_NONE) {
+ throw std::invalid_argument("model initialization failed!");
+ }
+
+ auto dataset = ml::train::createDataset(ml::train::DatasetType::GENERATOR,
+ trainData_cb, validData_cb);
+
+ std::vector<void *> dataset_props;
+ dataset_props.push_back((void *)"user_data");
+ dataset_props.push_back((void *)user_data);
+ dataset->setProperty(dataset_props);
+
+ model->setDataset(std::move(dataset));
+
+ model->train();
}
-int main() {
+UserDataType createFakeDataGenerator(unsigned int batch_size,
+ unsigned int simulted_data_size,
+ unsigned int data_split) {
+ UserDataType user_data;
+ unsigned int simulated_data_size = 512;
+ /// this is for train
+ user_data.emplace_back(new nntrainer::resnet::RandomDataLoader(
+ {{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}},
+ simulated_data_size / data_split));
+ /// this is for validation
+ user_data.emplace_back(new nntrainer::resnet::RandomDataLoader(
+ {{batch_size, 3, 32, 32}}, {{batch_size, 1, 1, 100}},
+ simulated_data_size / data_split));
+
+ return user_data;
+}
+
+UserDataType createRealDataGenerator() {
+ throw std::invalid_argument("reached here!");
+}
+
+int main(int argc, char *argv[]) {
+ if (argc < 4) {
+ std::cerr
+ << "usage: ./main [{data_directory}|\"fake\"] [batchsize] [data_split] \n"
+ << "when \"fake\" is given, original data size is assumed 512 for both "
+ "train and validation\n";
+ return 1;
+ }
+
+ std::string data_dir = argv[1];
+ unsigned int batch_size = std::stoul(argv[2]);
+ unsigned int data_split = std::stoul(argv[3]);
+
+ std::cout << "data_dir: " << data_dir << ' ' << "batch_size: " << batch_size
+ << " data_split: " << data_split << '\n';
+
+ /// warning: the data loader will be destroyed at the end of this function,
+ /// and passed as a pointer to the databuffer
+ UserDataType user_data;
+
+ try {
+ if (data_dir == "fake") {
+ user_data = createFakeDataGenerator(batch_size, 512, data_split);
+ } else {
+ user_data = createRealDataGenerator();
+ }
+ } catch (std::exception &e) {
+ std::cerr << "uncaught error while creating data generator! details: "
+ << e.what() << '\n';
+ return 1;
+ }
+
try {
- create_and_run();
+ createAndRun(1, 128, &user_data);
} catch (std::exception &e) {
- std::cerr << "uncaught error! error: " << e.what();
+ std::cerr << "uncaught error while running! details: " << e.what() << '\n';
return 1;
}