[Dataset] Add callback generator
authorJihoon Lee <jhoon.it.lee@samsung.com>
Mon, 12 Jul 2021 09:36:49 +0000 (18:36 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Wed, 28 Jul 2021 03:30:13 +0000 (12:30 +0900)
This patch add callback producer to abstract generator

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
jni/Android.mk
nntrainer/dataset/func_data_producer.cpp [new file with mode: 0644]
nntrainer/dataset/func_data_producer.h [new file with mode: 0644]
nntrainer/dataset/meson.build
nntrainer/dataset/random_data_producers.cpp
test/unittest/datasets/data_producer_common_tests.cpp
test/unittest/datasets/data_producer_common_tests.h
test/unittest/datasets/meson.build
test/unittest/datasets/unittest_func_data_producers.cpp [new file with mode: 0644]
test/unittest/datasets/unittest_random_data_producers.cpp

index 601efb5..306a402 100644 (file)
@@ -129,6 +129,8 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_factory.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_func.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_file.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/dataset/func_data_producer.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/dataset/random_data_producers.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/tensor/tensor.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/tensor/lazy_tensor.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/tensor/manager.cpp \
diff --git a/nntrainer/dataset/func_data_producer.cpp b/nntrainer/dataset/func_data_producer.cpp
new file mode 100644 (file)
index 0000000..0b723e7
--- /dev/null
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file   func_data_producer.cpp
+ * @date   12 July 2021
+ * @brief  This file contains various data producers from a callback
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include <func_data_producer.h>
+
+#include <nntrainer_error.h>
+
+namespace nntrainer {
+
+FuncDataProducer::FuncDataProducer(datagen_cb datagen_cb, void *user_data_) :
+  cb(datagen_cb),
+  user_data(user_data_) {}
+
+FuncDataProducer::~FuncDataProducer() {}
+
+const std::string FuncDataProducer::getType() const {
+  return FuncDataProducer::type;
+}
+
+void FuncDataProducer::setProperty(const std::vector<std::string> &properties) {
+  NNTR_THROW_IF(!properties.empty(), std::invalid_argument)
+    << "properties is not empty, size: " << properties.size();
+}
+
+DataProducer::Gernerator
+FuncDataProducer::finalize(const std::vector<TensorDim> &input_dims,
+                           const std::vector<TensorDim> &label_dims) {
+  NNTR_THROW_IF(!this->cb, std::invalid_argument)
+    << "given callback is nullptr!";
+
+  auto input_data = std::shared_ptr<float *>(new float *[input_dims.size()],
+                                             std::default_delete<float *[]>());
+  auto label_data = std::shared_ptr<float *>(new float *[label_dims.size()],
+                                             std::default_delete<float *[]>());
+
+  return [cb = this->cb, ud = this->user_data, input_dims, label_dims,
+          input_data, label_data]() -> DataProducer::Iteration {
+    std::vector<Tensor> inputs;
+    inputs.reserve(input_dims.size());
+
+    float **input_data_raw = input_data.get();
+    float **label_data_raw = label_data.get();
+
+    for (unsigned int i = 0; i < input_dims.size(); ++i) {
+      inputs.emplace_back(input_dims[i]);
+      *(input_data_raw + i) = inputs.back().getData();
+    }
+
+    std::vector<Tensor> labels;
+    labels.reserve(label_dims.size());
+
+    for (unsigned int i = 0; i < label_dims.size(); ++i) {
+      labels.emplace_back(label_dims[i]);
+      *(label_data_raw + i) = labels.back().getData();
+    }
+
+    bool last = false;
+    int status = cb(input_data_raw, label_data_raw, &last, ud);
+    NNTR_THROW_IF(status != ML_ERROR_NONE, std::invalid_argument)
+      << "[DataProducer] Callback returned error: " << status << '\n';
+
+    if (last) {
+      return {true, {}, {}};
+    } else {
+      return {false, inputs, labels};
+    }
+  };
+}
+} // namespace nntrainer
diff --git a/nntrainer/dataset/func_data_producer.h b/nntrainer/dataset/func_data_producer.h
new file mode 100644 (file)
index 0000000..b2b22c8
--- /dev/null
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file   func_data_producer.h
+ * @date   12 July 2021
+ * @brief  This file contains various data producers from a callback
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+#ifndef __FUNC_DATA_PRODUCER_H__
+#define __FUNC_DATA_PRODUCER_H__
+
+#include <data_producers.h>
+
+#include <dataset.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace nntrainer {
+
+using datagen_cb = ml::train::datagen_cb;
+
+/**
+ * @brief FuncDataProducer which contains a callback and returns back
+ *
+ */
+class FuncDataProducer final : public DataProducer {
+public:
+  /**
+   * @brief Construct a new Func Data Producer object
+   *
+   * @param datagen_cb data callback
+   * @param user_data_ user data
+   */
+  FuncDataProducer(datagen_cb datagen_cb, void *user_data_);
+
+  /**
+   * @brief Destroy the Func Data Producer object
+   *
+   */
+  ~FuncDataProducer();
+
+  inline static const std::string type = "batch_callback";
+
+  /**
+   * @copydoc DataProducer::getType()
+   */
+  const std::string getType() const override;
+
+  /**
+   * @copydoc DataProducer::setProeprty(const std::vector<std::string>
+   * &properties)
+   */
+  virtual void setProperty(const std::vector<std::string> &properties) override;
+
+  /**
+   * @copydoc DataProducer::finalize(const std::vector<TensorDim>, const
+   * std::vector<TensorDim>)
+   */
+  virtual DataProducer::Gernerator
+  finalize(const std::vector<TensorDim> &input_dims,
+           const std::vector<TensorDim> &label_dims) override;
+
+private:
+  datagen_cb cb;
+  void *user_data;
+};
+
+} // namespace nntrainer
+
+#endif // __FUNC_DATA_PRODUCER_H__
index 2883a52..9d58867 100644 (file)
@@ -3,7 +3,8 @@ dataset_sources = [
   'databuffer_factory.cpp',
   'databuffer_file.cpp',
   'databuffer_func.cpp',
-  'random_data_producers.cpp'
+  'random_data_producers.cpp',
+  'func_data_producer.cpp',
 ]
 
 dataset_headers = [
index 883bb28..f4a0837 100644 (file)
@@ -93,7 +93,7 @@ DataProducer::Gernerator
 RandomDataOneHotProducer::finalize(const std::vector<TensorDim> &input_dims,
                                    const std::vector<TensorDim> &label_dims) {
   /** check if the given producer is ready to finalize */
-  auto &[min_, max_, sz] = *rd_one_hot_props;
+  auto &[min_, max_, _] = *rd_one_hot_props;
 
   /// @todo expand this to non onehot case
   NNTR_THROW_IF(std::any_of(label_dims.begin(), label_dims.end(),
@@ -122,7 +122,7 @@ RandomDataOneHotProducer::finalize(const std::vector<TensorDim> &input_dims,
 
   std::mt19937 rng;
   rng.seed(getSeed());
-  sz = size(input_dims, input_dims);
+  auto sz = size(input_dims, input_dims);
   /** DataProducer::Generator */
   return [rng, sz, input_dims, label_dims, min_ = min_.get(), max_ = max_.get(),
           current_iteration = 0ULL,
index fae2756..f612cba 100644 (file)
@@ -39,6 +39,19 @@ TEST_P(DataProducerSemantics, finalize_pn) {
   }
 }
 
+TEST_P(DataProducerSemantics, error_once_or_not_pn) {
+  if (result == DataProducerSemanticsExpectedResult::FAIL_AT_FINALIZE) {
+    return; // skip this test
+  }
+
+  auto generator = producer->finalize(input_dims, label_dims);
+  if (result == DataProducerSemanticsExpectedResult::FAIL_AT_GENERATOR_CALL) {
+    EXPECT_ANY_THROW(generator());
+  } else {
+    EXPECT_NO_THROW(generator());
+  }
+}
+
 TEST_P(DataProducerSemantics, fetch_one_epoch_or_10_iteration_pn) {
   if (result != DataProducerSemanticsExpectedResult::SUCCESS) {
     return; // skip this test
@@ -49,7 +62,7 @@ TEST_P(DataProducerSemantics, fetch_one_epoch_or_10_iteration_pn) {
   bool has_fixed_size = sz != nntrainer::DataProducer::SIZE_UNDEFINED;
 
   if (!has_fixed_size) {
-    sz = 5;
+    sz = 10;
   }
 
   for (unsigned i = 0; i < sz; ++i) {
index b4565dc..33936f9 100644 (file)
@@ -33,8 +33,9 @@ using DataProducerValidatorType =
  *
  */
 enum class DataProducerSemanticsExpectedResult {
-  SUCCESS = 0,          /**< SUCCESS */
-  FAIL_AT_FINALIZE = 1, /**< FAIL AT FINALIZE */
+  SUCCESS = 0,                /**< SUCCESS */
+  FAIL_AT_FINALIZE = 1,       /**< FAIL AT FINALIZE */
+  FAIL_AT_GENERATOR_CALL = 2, /**< FAIL AT GENERATOR CALL */
 };
 
 using DataProducerSemanticsParamType =
index 2d94d49..d2b1762 100644 (file)
@@ -4,7 +4,8 @@ test_target = []
 
 producer_targets = [
   'data_producer_common_tests.cpp',
-  'unittest_random_data_producers.cpp'
+  'unittest_random_data_producers.cpp',
+  'unittest_func_data_producers.cpp'
 ]
 
 test_target += producer_targets
diff --git a/test/unittest/datasets/unittest_func_data_producers.cpp b/test/unittest/datasets/unittest_func_data_producers.cpp
new file mode 100644 (file)
index 0000000..857f796
--- /dev/null
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file unittest_func_data_producers.cpp
+ * @date 12 July 2021
+ * @brief Function data producers (Param Tests)
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <gtest/gtest.h>
+
+#include <data_producer_common_tests.h>
+#include <func_data_producer.h>
+#include <tensor.h>
+
+namespace {
+std::vector<nntrainer::TensorDim> input_shapes = {{3, 2, 4, 5}, {1, 2, 3, 4}};
+std::vector<nntrainer::TensorDim> label_shapes = {{3, 1, 1, 10}, {1, 1, 1, 2}};
+int user_data = 0;
+
+int getBatch(float **outVec, float **outLabel, bool *last, void *user_data) {
+  /** test user data is given correctly */
+  int *ud = reinterpret_cast<int *>(user_data);
+  *ud += 1;
+
+  /** first input/label is all zero, second input/label is all one */
+  auto first_input = nntrainer::Tensor::Map(
+    *outVec, input_shapes[0].getDataLen(), input_shapes[0]);
+  first_input.setValue(0);
+
+  auto second_input = nntrainer::Tensor::Map(
+    *(outVec + 1), input_shapes[1].getDataLen(), input_shapes[1]);
+  second_input.setValue(1);
+
+  auto first_label = nntrainer::Tensor::Map(
+    *outLabel, label_shapes[0].getDataLen(), label_shapes[0]);
+  first_label.setValue(0);
+
+  auto second_label = nntrainer::Tensor::Map(
+    *(outLabel + 1), label_shapes[1].getDataLen(), label_shapes[1]);
+  second_label.setValue(1);
+  *last = false;
+
+  return 0;
+};
+
+int getBatch_error(float **outVec, float **outLabel, bool *last,
+                   void *user_data) {
+  return -1;
+}
+
+bool validate(const std::vector<nntrainer::Tensor> &inputs,
+              const std::vector<nntrainer::Tensor> &labels) {
+  if (user_data == 0 || inputs.size() != 2 || labels.size() != 2) {
+    return false;
+  }
+
+  nntrainer::Tensor expected_first_input(input_shapes[0]);
+  expected_first_input.setValue(0);
+  nntrainer::Tensor expected_second_input(input_shapes[1]);
+  expected_second_input.setValue(1);
+
+  nntrainer::Tensor expected_first_label(label_shapes[0]);
+  expected_first_label.setValue(0);
+  nntrainer::Tensor expected_second_label(label_shapes[1]);
+  expected_second_label.setValue(1);
+
+  return inputs[0] == expected_first_input &&
+         inputs[1] == expected_second_input &&
+         labels[0] == expected_first_label &&
+         labels[1] == expected_second_label;
+};
+
+} // namespace
+
+std::unique_ptr<nntrainer::DataProducer>
+createConstantBatchProducer(const std::vector<std::string> &properties = {}) {
+  std::unique_ptr<nntrainer::DataProducer> ptr =
+    std::make_unique<nntrainer::FuncDataProducer>(getBatch, &user_data);
+  return ptr;
+}
+
+std::unique_ptr<nntrainer::DataProducer>
+createErrorBatchProducer(const std::vector<std::string> &properties = {}) {
+  std::unique_ptr<nntrainer::DataProducer> ptr =
+    std::make_unique<nntrainer::FuncDataProducer>(getBatch_error, nullptr);
+  return ptr;
+}
+
+std::unique_ptr<nntrainer::DataProducer>
+createNullBatchProducer(const std::vector<std::string> &properties = {}) {
+  std::unique_ptr<nntrainer::DataProducer> ptr =
+    std::make_unique<nntrainer::FuncDataProducer>(nullptr, nullptr);
+  return ptr;
+}
+
+auto func_success = DataProducerSemanticsParamType(
+  createConstantBatchProducer, {}, input_shapes, label_shapes, validate,
+  DataProducerSemanticsExpectedResult::SUCCESS);
+
+auto func_error = DataProducerSemanticsParamType(
+  createErrorBatchProducer, {}, input_shapes, label_shapes, nullptr,
+  DataProducerSemanticsExpectedResult::FAIL_AT_GENERATOR_CALL);
+
+auto func_nullptr = DataProducerSemanticsParamType(
+  createNullBatchProducer, {}, input_shapes, label_shapes, nullptr,
+  DataProducerSemanticsExpectedResult::FAIL_AT_FINALIZE);
+
+INSTANTIATE_TEST_CASE_P(Func, DataProducerSemantics,
+                        ::testing::Values(func_success, func_error,
+                                          func_nullptr));
index 3e076ef..dc39259 100644 (file)
@@ -2,9 +2,9 @@
 /**
  * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
  *
- * @file data_producer_common_tests.cpp
+ * @file unittest_random_data_producers.cpp
  * @date 12 July 2021
- * @brief Common test for nntrainer dataset producers (Param Tests)
+ * @brief Random data producers test (Param Tests)
  * @see        https://github.com/nnstreamer/nntrainer
  * @author Jihoon Lee <jhoon.it.lee@samsung.com>
  * @bug No known bugs except for NYI items