[Dataset] Add Iteration Object
authorJihoon Lee <jhoon.it.lee@samsung.com>
Wed, 11 Aug 2021 07:38:27 +0000 (16:38 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 26 Aug 2021 06:34:51 +0000 (15:34 +0900)
This patch adds a Iteration class and Sample class which allocate a tensor memory.
This object will be used in the queue.

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
14 files changed:
jni/Android.mk
nntrainer/dataset/batch_queue.h
nntrainer/dataset/data_iteration.cpp [new file with mode: 0644]
nntrainer/dataset/data_iteration.h [new file with mode: 0644]
nntrainer/dataset/data_producer.h [moved from nntrainer/dataset/data_producers.h with 98% similarity]
nntrainer/dataset/databuffer.h
nntrainer/dataset/databuffer_factory.cpp
nntrainer/dataset/func_data_producer.h
nntrainer/dataset/meson.build
nntrainer/dataset/random_data_producers.h
nntrainer/dataset/raw_file_data_producer.h
test/unittest/datasets/data_producer_common_tests.h
test/unittest/datasets/meson.build
test/unittest/datasets/unittest_data_iteration.cpp [new file with mode: 0644]

index 0e15d48..e34fd57 100644 (file)
@@ -127,6 +127,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/models/dynamic_training_optimization.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/batch_queue.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/dataset/data_iteration.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_factory.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/func_data_producer.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/dataset/random_data_producers.cpp \
index b440bfa..fab06b9 100644 (file)
@@ -18,7 +18,7 @@
 #include <queue>
 
 #include <condition_variable>
-#include <data_producers.h>
+#include <data_producer.h>
 #include <memory>
 #include <shared_mutex>
 
diff --git a/nntrainer/dataset/data_iteration.cpp b/nntrainer/dataset/data_iteration.cpp
new file mode 100644 (file)
index 0000000..a66301b
--- /dev/null
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file   data_iteration.cpp
+ * @date   11 Aug 2021
+ * @brief  This file contains iteration and sample class
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+#include <data_iteration.h>
+
+#include <algorithm>
+
+#include <nntrainer_error.h>
+#include <tensor.h>
+#include <tensor_dim.h>
+
+namespace nntrainer {
+
+namespace {
+
+/**
+ * @brief return allocated tensors from dimensions
+ *
+ * @param dims dimensions
+ * @return std::vector<Tensor> allocated tensors
+ */
+std::vector<Tensor>
+tensorsFromDims(const std::vector<ml::train::TensorDim> &dims) {
+  std::vector<Tensor> t;
+  t.reserve(dims.size());
+
+  for (auto &dim : dims) {
+    t.emplace_back(dim);
+  }
+
+  return t;
+}
+
+/**
+ * @brief check if all the dimension has the same batch, this is required
+ * assumption for the creation of Iteration
+ *
+ */
+bool isBatchSame(const std::vector<ml::train::TensorDim> &input_dims,
+                 const std::vector<ml::train::TensorDim> &label_dims) {
+  if (input_dims.empty()) {
+    /// requires at least one input
+    return false;
+  }
+
+  unsigned int reference_batch = input_dims.front().batch();
+  auto pred = [reference_batch](const TensorDim &dim) {
+    return dim.batch() == reference_batch;
+  };
+
+  return std::all_of(input_dims.begin(), input_dims.end(), pred) &&
+         std::all_of(label_dims.begin(), label_dims.end(), pred);
+}
+
+/**
+ * @brief slice vectors of tensors in to batch direction
+ *
+ * @param batched_tensors batched tensor
+ * @param b  batch
+ * @return std::vector<Tensor> sliced tensor
+ */
+std::vector<Tensor> sliceTensor(const std::vector<Tensor> &batched_tensors,
+                                unsigned int b) {
+  std::vector<Tensor> sliced_tensor;
+  sliced_tensor.reserve(batched_tensors.size());
+  std::transform(batched_tensors.begin(), batched_tensors.end(),
+                 std::back_inserter(sliced_tensor),
+                 [b](const Tensor &t) { return t.getBatchSlice(b, 1); });
+  return sliced_tensor;
+};
+
+std::vector<Sample> unpackIteration(Iteration &iter) {
+  auto b = iter.batch();
+
+  std::vector<Sample> samples;
+  samples.reserve(b);
+
+  for (decltype(b) i = 0; i < b; ++i) {
+    samples.emplace_back(iter, i);
+  }
+
+  return samples;
+}
+
+} // namespace
+
+Iteration::Iteration(const std::vector<ml::train::TensorDim> &input_dims,
+                     const std::vector<ml::train::TensorDim> &label_dims) :
+  inputs(tensorsFromDims(input_dims)),
+  labels(tensorsFromDims(label_dims)) {
+
+  NNTR_THROW_IF(!isBatchSame(input_dims, label_dims), std::invalid_argument)
+    << "check batch size is all the same for all the input and label";
+
+  samples = unpackIteration(*this);
+}
+
+Sample::Sample(const Iteration &iter, unsigned int batch) :
+  inputs(sliceTensor(iter.getInputsRef(), batch)),
+  labels(sliceTensor(iter.getLabelsRef(), batch)) {}
+
+} // namespace nntrainer
diff --git a/nntrainer/dataset/data_iteration.h b/nntrainer/dataset/data_iteration.h
new file mode 100644 (file)
index 0000000..e8d8e32
--- /dev/null
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file   data_iteration.h
+ * @date   11 Aug 2021
+ * @brief  This file contains iteration and sample class
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+#ifndef __DATA_SAMPLE_H__
+#define __DATA_SAMPLE_H__
+
+#include <functional>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include <tensor.h>
+#include <tensor_dim.h>
+
+namespace nntrainer {
+
+class Sample;
+
+/**
+ * @brief Iteration class which owns the memory chunk for a single batch
+ *
+ */
+class Iteration {
+
+public:
+  /**
+   * @brief Construct a new Iteration object
+   * @note the batch dimension must be the same for all given dimensions and the
+   * first input must not be empty
+   *
+   * @param input_dims input dimension
+   * @param label_dims label dimension
+   */
+  Iteration(const std::vector<ml::train::TensorDim> &input_dims,
+            const std::vector<ml::train::TensorDim> &label_dims);
+
+  Iteration(const Iteration &rhs) = delete;
+  Iteration &operator=(const Iteration &rhs) = delete;
+  Iteration(Iteration &&rhs) = default;
+  Iteration &operator=(Iteration &&rhs) = default;
+
+  /**
+   * @brief get batch size of iteration
+   *
+   * @return unsigned int batch size
+   */
+  unsigned int batch() { return inputs.front().batch(); }
+
+  /**
+   * @brief Get the Input Reference object
+   *
+   * @return std::vector<Tensor>& input
+   */
+  std::vector<Tensor> &getInputsRef() { return inputs; }
+
+  /**
+   * @brief Get the Input Reference object
+   *
+   * @return const std::vector<Tensor>& input
+   */
+  const std::vector<Tensor> &getInputsRef() const { return inputs; }
+
+  /**
+   * @brief Get the Label Reference object
+   *
+   * @return std::vector<Tensor>&  label
+   */
+  std::vector<Tensor> &getLabelsRef() { return labels; }
+
+  /**
+   * @brief Get the Label Reference object
+   *
+   * @return const std::vector<Tensor>&  label
+   */
+  const std::vector<Tensor> &getLabelsRef() const { return labels; }
+
+  /**
+   * @brief get sample iterator begin()
+   *
+   * @return std::vector<Sample>::iterator
+   */
+  std::vector<Sample>::iterator begin() { return samples.begin(); }
+
+  /**
+   * @brief get sample iterator end
+   *
+   * @return std::vector<Sample>::iterator
+   */
+  std::vector<Sample>::iterator end() { return samples.end(); }
+
+  /**
+   * @brief get sample iterator begin
+   *
+   * @return std::vector<Sample>::const_iterator
+   */
+  std::vector<Sample>::const_iterator begin() const { return samples.end(); }
+
+  /**
+   * @brief get sample iterator end
+   *
+   * @return std::vector<Sample>::const_iterator
+   */
+  std::vector<Sample>::const_iterator end() const { return samples.end(); }
+
+private:
+  std::vector<Tensor> inputs, labels;
+  std::vector<Sample> samples;
+};
+
+/**
+ * @brief Sample class which views the memory for a single sample
+ *
+ */
+class Sample {
+
+public:
+  /**
+   * @brief Construct a new Sample object
+   * @note the batch dimension will be ignored to make a single sample
+   *
+   * @param iter iteration obejcts
+   * @param batch nth batch to create the sample
+   */
+  Sample(const Iteration &iter, unsigned int batch);
+
+  /**
+   * @brief Get the Input Reference object
+   *
+   * @return std::vector<Tensor>& input
+   */
+  std::vector<Tensor> &getInputsRef() { return inputs; }
+
+  /**
+   * @brief Get the Input Reference object
+   *
+   * @return const std::vector<Tensor>& input
+   */
+  const std::vector<Tensor> &getInputsRef() const { return inputs; }
+
+  /**
+   * @brief Get the Label Reference object
+   *
+   * @return std::vector<Tensor>&  label
+   */
+  std::vector<Tensor> &getLabelsRef() { return labels; }
+
+  /**
+   * @brief Get the Label Reference object
+   *
+   * @return const std::vector<Tensor>&  label
+   */
+  const std::vector<Tensor> &getLabelsRef() const { return labels; }
+
+private:
+  std::vector<Tensor> inputs, labels;
+};
+
+} // namespace nntrainer
+
+#endif // __DATA_SAMPLE_H__
similarity index 98%
rename from nntrainer/dataset/data_producers.h
rename to nntrainer/dataset/data_producer.h
index 175e8de..78c9d49 100644 (file)
@@ -2,7 +2,7 @@
 /**
  * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
  *
- * @file   data_producers.h
+ * @file   data_producer.h
  * @date   09 July 2021
  * @brief  This file contains data producer interface
  * @see    https://github.com/nnstreamer/nntrainer
@@ -10,8 +10,8 @@
  * @bug    No known bugs except for NYI items
  *
  */
-#ifndef __DATA_PRODUCERS_H__
-#define __DATA_PRODUCERS_H__
+#ifndef __DATA_PRODUCER_H__
+#define __DATA_PRODUCER_H__
 
 #include <functional>
 #include <limits>
@@ -175,4 +175,4 @@ public:
   virtual bool isMultiThreadSafe() const { return false; }
 };
 } // namespace nntrainer
-#endif // __DATA_PRODUCERS_H__
+#endif // __DATA_PRODUCER_H__
index bcbda2f..04d0d19 100644 (file)
@@ -34,7 +34,7 @@
 #include <vector>
 
 #include <batch_queue.h>
-#include <data_producers.h>
+#include <data_producer.h>
 #include <dataset.h>
 #include <tensor_dim.h>
 
index 70efd2b..a59665a 100644 (file)
@@ -12,7 +12,7 @@
 
 #include <databuffer_factory.h>
 
-#include <data_producers.h>
+#include <data_producer.h>
 #include <func_data_producer.h>
 #include <nntrainer_error.h>
 #include <raw_file_data_producer.h>
index d6caab0..114272b 100644 (file)
@@ -13,7 +13,7 @@
 #ifndef __FUNC_DATA_PRODUCER_H__
 #define __FUNC_DATA_PRODUCER_H__
 
-#include <data_producers.h>
+#include <data_producer.h>
 
 #include <dataset.h>
 
index 18717c5..26d6ef9 100644 (file)
@@ -1,6 +1,7 @@
 dataset_sources = [
   'batch_queue.cpp',
   'databuffer.cpp',
+  'data_iteration.cpp',
   'databuffer_factory.cpp',
   'random_data_producers.cpp',
   'func_data_producer.cpp',
index 49a9d51..66b7b16 100644 (file)
@@ -13,7 +13,7 @@
 #ifndef __RANDOM_DATA_PRODUCER_H__
 #define __RANDOM_DATA_PRODUCER_H__
 
-#include <data_producers.h>
+#include <data_producer.h>
 
 #include <memory>
 #include <random>
index 39593e9..5ec903b 100644 (file)
@@ -13,7 +13,7 @@
 #ifndef __RAW_FILE_DATA_PRODUCER_H__
 #define __RAW_FILE_DATA_PRODUCER_H__
 
-#include <data_producers.h>
+#include <data_producer.h>
 
 #include <dataset.h>
 
index 2988505..cf4d47f 100644 (file)
@@ -16,7 +16,7 @@
 
 #include <vector>
 
-#include <data_producers.h>
+#include <data_producer.h>
 #include <tensor.h>
 #include <tensor_dim.h>
 
index ac7b4fd..da50092 100644 (file)
@@ -8,7 +8,8 @@ producer_targets = [
   'unittest_func_data_producer.cpp',
   'unittest_raw_file_data_producer.cpp',
   'unittest_batch_queue.cpp',
-  'unittest_databuffer.cpp'
+  'unittest_databuffer.cpp',
+  'unittest_data_iteration.cpp'
 ]
 
 test_target += producer_targets
diff --git a/test/unittest/datasets/unittest_data_iteration.cpp b/test/unittest/datasets/unittest_data_iteration.cpp
new file mode 100644 (file)
index 0000000..bf49d6f
--- /dev/null
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jihoon Lee <jhoon.it.lee@samsung.com>
+ *
+ * @file unittest_data_iteration.cpp
+ * @date 11 Aug 2021
+ * @brief Sample Test
+ * @see        https://github.com/nnstreamer/nntrainer
+ * @author Jihoon Lee <jhoon.it.lee@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <gtest/gtest.h>
+
+#include <data_iteration.h>
+#include <tensor.h>
+#include <tensor_dim.h>
+
+TEST(DataIteration, construct_p) {
+  EXPECT_NO_THROW(nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}},
+                                       {{3, 1, 1, 10}, {3, 2, 4, 5}}));
+
+  {
+    auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}},
+                                     {{3, 1, 1, 10}, {3, 2, 4, 5}});
+
+    EXPECT_NO_THROW(iter.getInputsRef());
+    EXPECT_NO_THROW(iter.getLabelsRef());
+  }
+  {
+    const auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}},
+                                           {{3, 1, 1, 10}, {3, 2, 4, 5}});
+
+    EXPECT_NO_THROW(iter.getInputsRef());
+    EXPECT_NO_THROW(iter.getLabelsRef());
+
+    for (auto i = iter.begin(); i != iter.end(); ++i) {
+      EXPECT_EQ(i->getInputsRef().front().getDim(),
+                nntrainer::TensorDim(1, 1, 1, 1));
+      EXPECT_EQ(i->getInputsRef().back().getDim(),
+                nntrainer::TensorDim(1, 1, 1, 2));
+      EXPECT_EQ(i->getLabelsRef().front().getDim(),
+                nntrainer::TensorDim(1, 1, 1, 10));
+      EXPECT_EQ(i->getLabelsRef().back().getDim(),
+                nntrainer::TensorDim(1, 2, 4, 5));
+    }
+  }
+}
+
+TEST(DataIteration, constructEmptyInput_n) {
+  EXPECT_THROW(nntrainer::Iteration({}, {{3, 1, 1, 10}, {3, 2, 4, 5}}),
+               std::invalid_argument);
+}
+
+TEST(DataIteration, constructDifferentBatchSize_n) {
+  EXPECT_THROW(nntrainer::Iteration({{3, 1, 1, 1}, {2, 1, 1, 2}},
+                                    {{3, 1, 1, 10}, {3, 2, 4, 5}}),
+               std::invalid_argument);
+}
+
+TEST(DataSample, constructSample_p) {
+  auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}},
+                                   {{3, 1, 1, 10}, {3, 2, 4, 5}});
+
+  EXPECT_NO_THROW(nntrainer::Sample(iter, 0));
+  EXPECT_NO_THROW(nntrainer::Sample(iter, 1));
+  EXPECT_NO_THROW(nntrainer::Sample(iter, 2));
+}
+
+TEST(DataSample, constructOutOfBatch_n) {
+  auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}},
+                                   {{3, 1, 1, 10}, {3, 2, 4, 5}});
+
+  EXPECT_ANY_THROW(nntrainer::Sample(iter, 3));
+  EXPECT_ANY_THROW(nntrainer::Sample(iter, 4));
+}