From 6d27bed58f9b64857c9664d3289fa47554f636ca Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Wed, 11 Aug 2021 16:38:27 +0900 Subject: [PATCH] [Dataset] Add Iteration Object This patch adds a Iteration class and Sample class which allocate a tensor memory. This object will be used in the queue. **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- jni/Android.mk | 1 + nntrainer/dataset/batch_queue.h | 2 +- nntrainer/dataset/data_iteration.cpp | 111 ++++++++++++++ nntrainer/dataset/data_iteration.h | 169 +++++++++++++++++++++ .../dataset/{data_producers.h => data_producer.h} | 8 +- nntrainer/dataset/databuffer.h | 2 +- nntrainer/dataset/databuffer_factory.cpp | 2 +- nntrainer/dataset/func_data_producer.h | 2 +- nntrainer/dataset/meson.build | 1 + nntrainer/dataset/random_data_producers.h | 2 +- nntrainer/dataset/raw_file_data_producer.h | 2 +- .../unittest/datasets/data_producer_common_tests.h | 2 +- test/unittest/datasets/meson.build | 3 +- test/unittest/datasets/unittest_data_iteration.cpp | 76 +++++++++ 14 files changed, 371 insertions(+), 12 deletions(-) create mode 100644 nntrainer/dataset/data_iteration.cpp create mode 100644 nntrainer/dataset/data_iteration.h rename nntrainer/dataset/{data_producers.h => data_producer.h} (98%) create mode 100644 test/unittest/datasets/unittest_data_iteration.cpp diff --git a/jni/Android.mk b/jni/Android.mk index 0e15d48..e34fd57 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -127,6 +127,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \ $(NNTRAINER_ROOT)/nntrainer/models/dynamic_training_optimization.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/batch_queue.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer.cpp \ + $(NNTRAINER_ROOT)/nntrainer/dataset/data_iteration.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/databuffer_factory.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/func_data_producer.cpp \ $(NNTRAINER_ROOT)/nntrainer/dataset/random_data_producers.cpp \ diff --git a/nntrainer/dataset/batch_queue.h b/nntrainer/dataset/batch_queue.h index b440bfa..fab06b9 100644 --- a/nntrainer/dataset/batch_queue.h +++ b/nntrainer/dataset/batch_queue.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/nntrainer/dataset/data_iteration.cpp b/nntrainer/dataset/data_iteration.cpp new file mode 100644 index 0000000..a66301b --- /dev/null +++ b/nntrainer/dataset/data_iteration.cpp @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2021 Jihoon Lee + * + * @file data_iteration.cpp + * @date 11 Aug 2021 + * @brief This file contains iteration and sample class + * @see https://github.com/nnstreamer/nntrainer + * @author Jihoon Lee + * @bug No known bugs except for NYI items + * + */ +#include + +#include + +#include +#include +#include + +namespace nntrainer { + +namespace { + +/** + * @brief return allocated tensors from dimensions + * + * @param dims dimensions + * @return std::vector allocated tensors + */ +std::vector +tensorsFromDims(const std::vector &dims) { + std::vector t; + t.reserve(dims.size()); + + for (auto &dim : dims) { + t.emplace_back(dim); + } + + return t; +} + +/** + * @brief check if all the dimension has the same batch, this is required + * assumption for the creation of Iteration + * + */ +bool isBatchSame(const std::vector &input_dims, + const std::vector &label_dims) { + if (input_dims.empty()) { + /// requires at least one input + return false; + } + + unsigned int reference_batch = input_dims.front().batch(); + auto pred = [reference_batch](const TensorDim &dim) { + return dim.batch() == reference_batch; + }; + + return std::all_of(input_dims.begin(), input_dims.end(), pred) && + std::all_of(label_dims.begin(), label_dims.end(), pred); +} + +/** + * @brief slice vectors of tensors in to batch direction + * + * @param batched_tensors batched tensor + * @param b batch + * @return std::vector sliced tensor + */ +std::vector sliceTensor(const std::vector &batched_tensors, + unsigned int b) { + std::vector sliced_tensor; + sliced_tensor.reserve(batched_tensors.size()); + std::transform(batched_tensors.begin(), batched_tensors.end(), + std::back_inserter(sliced_tensor), + [b](const Tensor &t) { return t.getBatchSlice(b, 1); }); + return sliced_tensor; +}; + +std::vector unpackIteration(Iteration &iter) { + auto b = iter.batch(); + + std::vector samples; + samples.reserve(b); + + for (decltype(b) i = 0; i < b; ++i) { + samples.emplace_back(iter, i); + } + + return samples; +} + +} // namespace + +Iteration::Iteration(const std::vector &input_dims, + const std::vector &label_dims) : + inputs(tensorsFromDims(input_dims)), + labels(tensorsFromDims(label_dims)) { + + NNTR_THROW_IF(!isBatchSame(input_dims, label_dims), std::invalid_argument) + << "check batch size is all the same for all the input and label"; + + samples = unpackIteration(*this); +} + +Sample::Sample(const Iteration &iter, unsigned int batch) : + inputs(sliceTensor(iter.getInputsRef(), batch)), + labels(sliceTensor(iter.getLabelsRef(), batch)) {} + +} // namespace nntrainer diff --git a/nntrainer/dataset/data_iteration.h b/nntrainer/dataset/data_iteration.h new file mode 100644 index 0000000..e8d8e32 --- /dev/null +++ b/nntrainer/dataset/data_iteration.h @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2021 Jihoon Lee + * + * @file data_iteration.h + * @date 11 Aug 2021 + * @brief This file contains iteration and sample class + * @see https://github.com/nnstreamer/nntrainer + * @author Jihoon Lee + * @bug No known bugs except for NYI items + * + */ +#ifndef __DATA_SAMPLE_H__ +#define __DATA_SAMPLE_H__ + +#include +#include +#include +#include + +#include +#include + +namespace nntrainer { + +class Sample; + +/** + * @brief Iteration class which owns the memory chunk for a single batch + * + */ +class Iteration { + +public: + /** + * @brief Construct a new Iteration object + * @note the batch dimension must be the same for all given dimensions and the + * first input must not be empty + * + * @param input_dims input dimension + * @param label_dims label dimension + */ + Iteration(const std::vector &input_dims, + const std::vector &label_dims); + + Iteration(const Iteration &rhs) = delete; + Iteration &operator=(const Iteration &rhs) = delete; + Iteration(Iteration &&rhs) = default; + Iteration &operator=(Iteration &&rhs) = default; + + /** + * @brief get batch size of iteration + * + * @return unsigned int batch size + */ + unsigned int batch() { return inputs.front().batch(); } + + /** + * @brief Get the Input Reference object + * + * @return std::vector& input + */ + std::vector &getInputsRef() { return inputs; } + + /** + * @brief Get the Input Reference object + * + * @return const std::vector& input + */ + const std::vector &getInputsRef() const { return inputs; } + + /** + * @brief Get the Label Reference object + * + * @return std::vector& label + */ + std::vector &getLabelsRef() { return labels; } + + /** + * @brief Get the Label Reference object + * + * @return const std::vector& label + */ + const std::vector &getLabelsRef() const { return labels; } + + /** + * @brief get sample iterator begin() + * + * @return std::vector::iterator + */ + std::vector::iterator begin() { return samples.begin(); } + + /** + * @brief get sample iterator end + * + * @return std::vector::iterator + */ + std::vector::iterator end() { return samples.end(); } + + /** + * @brief get sample iterator begin + * + * @return std::vector::const_iterator + */ + std::vector::const_iterator begin() const { return samples.end(); } + + /** + * @brief get sample iterator end + * + * @return std::vector::const_iterator + */ + std::vector::const_iterator end() const { return samples.end(); } + +private: + std::vector inputs, labels; + std::vector samples; +}; + +/** + * @brief Sample class which views the memory for a single sample + * + */ +class Sample { + +public: + /** + * @brief Construct a new Sample object + * @note the batch dimension will be ignored to make a single sample + * + * @param iter iteration obejcts + * @param batch nth batch to create the sample + */ + Sample(const Iteration &iter, unsigned int batch); + + /** + * @brief Get the Input Reference object + * + * @return std::vector& input + */ + std::vector &getInputsRef() { return inputs; } + + /** + * @brief Get the Input Reference object + * + * @return const std::vector& input + */ + const std::vector &getInputsRef() const { return inputs; } + + /** + * @brief Get the Label Reference object + * + * @return std::vector& label + */ + std::vector &getLabelsRef() { return labels; } + + /** + * @brief Get the Label Reference object + * + * @return const std::vector& label + */ + const std::vector &getLabelsRef() const { return labels; } + +private: + std::vector inputs, labels; +}; + +} // namespace nntrainer + +#endif // __DATA_SAMPLE_H__ diff --git a/nntrainer/dataset/data_producers.h b/nntrainer/dataset/data_producer.h similarity index 98% rename from nntrainer/dataset/data_producers.h rename to nntrainer/dataset/data_producer.h index 175e8de..78c9d49 100644 --- a/nntrainer/dataset/data_producers.h +++ b/nntrainer/dataset/data_producer.h @@ -2,7 +2,7 @@ /** * Copyright (C) 2021 Jihoon Lee * - * @file data_producers.h + * @file data_producer.h * @date 09 July 2021 * @brief This file contains data producer interface * @see https://github.com/nnstreamer/nntrainer @@ -10,8 +10,8 @@ * @bug No known bugs except for NYI items * */ -#ifndef __DATA_PRODUCERS_H__ -#define __DATA_PRODUCERS_H__ +#ifndef __DATA_PRODUCER_H__ +#define __DATA_PRODUCER_H__ #include #include @@ -175,4 +175,4 @@ public: virtual bool isMultiThreadSafe() const { return false; } }; } // namespace nntrainer -#endif // __DATA_PRODUCERS_H__ +#endif // __DATA_PRODUCER_H__ diff --git a/nntrainer/dataset/databuffer.h b/nntrainer/dataset/databuffer.h index bcbda2f..04d0d19 100644 --- a/nntrainer/dataset/databuffer.h +++ b/nntrainer/dataset/databuffer.h @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include diff --git a/nntrainer/dataset/databuffer_factory.cpp b/nntrainer/dataset/databuffer_factory.cpp index 70efd2b..a59665a 100644 --- a/nntrainer/dataset/databuffer_factory.cpp +++ b/nntrainer/dataset/databuffer_factory.cpp @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include diff --git a/nntrainer/dataset/func_data_producer.h b/nntrainer/dataset/func_data_producer.h index d6caab0..114272b 100644 --- a/nntrainer/dataset/func_data_producer.h +++ b/nntrainer/dataset/func_data_producer.h @@ -13,7 +13,7 @@ #ifndef __FUNC_DATA_PRODUCER_H__ #define __FUNC_DATA_PRODUCER_H__ -#include +#include #include diff --git a/nntrainer/dataset/meson.build b/nntrainer/dataset/meson.build index 18717c5..26d6ef9 100644 --- a/nntrainer/dataset/meson.build +++ b/nntrainer/dataset/meson.build @@ -1,6 +1,7 @@ dataset_sources = [ 'batch_queue.cpp', 'databuffer.cpp', + 'data_iteration.cpp', 'databuffer_factory.cpp', 'random_data_producers.cpp', 'func_data_producer.cpp', diff --git a/nntrainer/dataset/random_data_producers.h b/nntrainer/dataset/random_data_producers.h index 49a9d51..66b7b16 100644 --- a/nntrainer/dataset/random_data_producers.h +++ b/nntrainer/dataset/random_data_producers.h @@ -13,7 +13,7 @@ #ifndef __RANDOM_DATA_PRODUCER_H__ #define __RANDOM_DATA_PRODUCER_H__ -#include +#include #include #include diff --git a/nntrainer/dataset/raw_file_data_producer.h b/nntrainer/dataset/raw_file_data_producer.h index 39593e9..5ec903b 100644 --- a/nntrainer/dataset/raw_file_data_producer.h +++ b/nntrainer/dataset/raw_file_data_producer.h @@ -13,7 +13,7 @@ #ifndef __RAW_FILE_DATA_PRODUCER_H__ #define __RAW_FILE_DATA_PRODUCER_H__ -#include +#include #include diff --git a/test/unittest/datasets/data_producer_common_tests.h b/test/unittest/datasets/data_producer_common_tests.h index 2988505..cf4d47f 100644 --- a/test/unittest/datasets/data_producer_common_tests.h +++ b/test/unittest/datasets/data_producer_common_tests.h @@ -16,7 +16,7 @@ #include -#include +#include #include #include diff --git a/test/unittest/datasets/meson.build b/test/unittest/datasets/meson.build index ac7b4fd..da50092 100644 --- a/test/unittest/datasets/meson.build +++ b/test/unittest/datasets/meson.build @@ -8,7 +8,8 @@ producer_targets = [ 'unittest_func_data_producer.cpp', 'unittest_raw_file_data_producer.cpp', 'unittest_batch_queue.cpp', - 'unittest_databuffer.cpp' + 'unittest_databuffer.cpp', + 'unittest_data_iteration.cpp' ] test_target += producer_targets diff --git a/test/unittest/datasets/unittest_data_iteration.cpp b/test/unittest/datasets/unittest_data_iteration.cpp new file mode 100644 index 0000000..bf49d6f --- /dev/null +++ b/test/unittest/datasets/unittest_data_iteration.cpp @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * Copyright (C) 2021 Jihoon Lee + * + * @file unittest_data_iteration.cpp + * @date 11 Aug 2021 + * @brief Sample Test + * @see https://github.com/nnstreamer/nntrainer + * @author Jihoon Lee + * @bug No known bugs except for NYI items + */ + +#include + +#include +#include +#include + +TEST(DataIteration, construct_p) { + EXPECT_NO_THROW(nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}})); + + { + auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}}); + + EXPECT_NO_THROW(iter.getInputsRef()); + EXPECT_NO_THROW(iter.getLabelsRef()); + } + { + const auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}}); + + EXPECT_NO_THROW(iter.getInputsRef()); + EXPECT_NO_THROW(iter.getLabelsRef()); + + for (auto i = iter.begin(); i != iter.end(); ++i) { + EXPECT_EQ(i->getInputsRef().front().getDim(), + nntrainer::TensorDim(1, 1, 1, 1)); + EXPECT_EQ(i->getInputsRef().back().getDim(), + nntrainer::TensorDim(1, 1, 1, 2)); + EXPECT_EQ(i->getLabelsRef().front().getDim(), + nntrainer::TensorDim(1, 1, 1, 10)); + EXPECT_EQ(i->getLabelsRef().back().getDim(), + nntrainer::TensorDim(1, 2, 4, 5)); + } + } +} + +TEST(DataIteration, constructEmptyInput_n) { + EXPECT_THROW(nntrainer::Iteration({}, {{3, 1, 1, 10}, {3, 2, 4, 5}}), + std::invalid_argument); +} + +TEST(DataIteration, constructDifferentBatchSize_n) { + EXPECT_THROW(nntrainer::Iteration({{3, 1, 1, 1}, {2, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}}), + std::invalid_argument); +} + +TEST(DataSample, constructSample_p) { + auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}}); + + EXPECT_NO_THROW(nntrainer::Sample(iter, 0)); + EXPECT_NO_THROW(nntrainer::Sample(iter, 1)); + EXPECT_NO_THROW(nntrainer::Sample(iter, 2)); +} + +TEST(DataSample, constructOutOfBatch_n) { + auto iter = nntrainer::Iteration({{3, 1, 1, 1}, {3, 1, 1, 2}}, + {{3, 1, 1, 10}, {3, 2, 4, 5}}); + + EXPECT_ANY_THROW(nntrainer::Sample(iter, 3)); + EXPECT_ANY_THROW(nntrainer::Sample(iter, 4)); +} -- 2.7.4