[Dataset] Rework func dataset to samplewise
authorJihoon Lee <jhoon.it.lee@samsung.com>
Tue, 10 Aug 2021 10:56:15 +0000 (19:56 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Tue, 24 Aug 2021 03:38:39 +0000 (12:38 +0900)
**Changes proposed in this PR:**
- Fix bugs in Tensor::Map
- Add shortcut for the Tensor::updateBatch
- Add func_data_producer with sample

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
nntrainer/dataset/func_data_producer.cpp
nntrainer/dataset/func_data_producer.h
nntrainer/layers/split_layer.cpp
nntrainer/tensor/tensor.cpp
nntrainer/tensor/tensor.h
test/unittest/datasets/data_producer_common_tests.cpp
test/unittest/datasets/unittest_func_data_producer.cpp
test/unittest/unittest_nntrainer_tensor.cpp

index 433f436a1cb324286255620060a1eff3ba42ea83..08b7ec0963e549b225973820b84d9f935c004057 100644 (file)
@@ -90,4 +90,38 @@ FuncDataProducer::finalize(const std::vector<TensorDim> &input_dims,
     }
   };
 }
+
+DataProducer::Generator_sample
+FuncDataProducer::finalize_sample(const std::vector<TensorDim> &input_dims,
+                                  const std::vector<TensorDim> &label_dims) {
+  NNTR_THROW_IF(!this->cb, std::invalid_argument)
+    << "given callback is nullptr!";
+
+  auto input_data = std::shared_ptr<float *>(new float *[input_dims.size()],
+                                             std::default_delete<float *[]>());
+  auto label_data = std::shared_ptr<float *>(new float *[label_dims.size()],
+                                             std::default_delete<float *[]>());
+
+  return [cb = this->cb, ud = this->user_data_prop->get(), input_data,
+          label_data](unsigned int idx, std::vector<Tensor *> &inputs,
+                      std::vector<Tensor *> &labels) -> bool {
+    float **input_data_raw = input_data.get();
+    float **label_data_raw = label_data.get();
+
+    for (unsigned int i = 0; i < inputs.size(); ++i) {
+      *(input_data_raw + i) = inputs[i]->getData();
+    }
+
+    for (unsigned int i = 0; i < labels.size(); ++i) {
+      *(label_data_raw + i) = labels[i]->getData();
+    }
+
+    bool last = false;
+    int status = cb(input_data_raw, label_data_raw, &last, ud);
+    NNTR_THROW_IF(status != ML_ERROR_NONE, std::invalid_argument)
+      << "[DataProducer] Callback returned error: " << status << '\n';
+
+    return last;
+  };
+}
 } // namespace nntrainer
index 4e5d3751c067f179926ef1af4f614fc266b9d7e7..35f179382ed96ee545b08f8cce54d473d47b1b52 100644 (file)
@@ -58,16 +58,24 @@ public:
    * @copydoc DataProducer::setProeprty(const std::vector<std::string>
    * &properties)
    */
-  virtual void setProperty(const std::vector<std::string> &properties) override;
+  void setProperty(const std::vector<std::string> &properties) override;
 
   /**
    * @copydoc DataProducer::finalize(const std::vector<TensorDim>, const
    * std::vector<TensorDim>)
    */
-  virtual DataProducer::Generator
+  DataProducer::Generator
   finalize(const std::vector<TensorDim> &input_dims,
            const std::vector<TensorDim> &label_dims) override;
 
+  /**
+   * @copydoc DataProducer::finalize_sample(const std::vector<TensorDim>, const
+   * std::vector<TensorDim>)
+   */
+  DataProducer::Generator_sample
+  finalize_sample(const std::vector<TensorDim> &input_dims,
+                  const std::vector<TensorDim> &label_dims) override;
+
 private:
   datagen_cb cb;
   std::unique_ptr<PropsUserData> user_data_prop;
index 429f2f2d201c7708a5dad4ce00a52d5541a39985..6aec76eebe664f8054bceca1ce5cd8df4a3459e6 100644 (file)
@@ -106,12 +106,14 @@ void SplitLayer::forwarding(RunLayerContext &context, bool training) {
     output_.reshape(output_reshape_helper);
 
     for (unsigned int batch = 0; batch < input_.batch(); batch++) {
-      const Tensor source_tensor = Tensor::Map(
-        input_.getAddress(batch, 0, idx, 0), input_reshape_helper.width(),
-        {1, 1, 1, input_reshape_helper.width()});
-      Tensor dest_tensor = Tensor::Map(
-        output_.getAddress(batch, 0, 0, 0), output_reshape_helper.width(),
-        {1, 1, 1, output_reshape_helper.width()});
+      const Tensor source_tensor =
+        Tensor::Map(input_.getAddress(batch, 0, idx, 0),
+                    input_reshape_helper.width() * sizeof(float),
+                    {1, 1, 1, input_reshape_helper.width()});
+      Tensor dest_tensor =
+        Tensor::Map(output_.getAddress(batch, 0, 0, 0),
+                    output_reshape_helper.width() * sizeof(float),
+                    {1, 1, 1, output_reshape_helper.width()});
       dest_tensor.copy(source_tensor);
     }
 
@@ -133,12 +135,14 @@ void SplitLayer::calcDerivative(RunLayerContext &context) {
     output_.reshape(output_reshape_helper);
 
     for (unsigned int batch = 0; batch < input_.batch(); batch++) {
-      Tensor dest_tensor = Tensor::Map(input_.getAddress(batch, 0, idx, 0),
-                                       input_reshape_helper.width(),
-                                       {1, 1, 1, input_reshape_helper.width()});
-      const Tensor source_tensor = Tensor::Map(
-        output_.getAddress(batch, 0, 0, 0), output_reshape_helper.width(),
-        {1, 1, 1, output_reshape_helper.width()});
+      Tensor dest_tensor =
+        Tensor::Map(input_.getAddress(batch, 0, idx, 0),
+                    input_reshape_helper.width() * sizeof(float),
+                    {1, 1, 1, input_reshape_helper.width()});
+      const Tensor source_tensor =
+        Tensor::Map(output_.getAddress(batch, 0, 0, 0),
+                    output_reshape_helper.width() * sizeof(float),
+                    {1, 1, 1, output_reshape_helper.width()});
       dest_tensor.copy(source_tensor);
     }
 
index aa1fd6714923811eb53dff8f4a838f53f70dccfc..fa8f4cfe6bafd860fe07d1852e88c68188cdf614 100644 (file)
@@ -162,14 +162,14 @@ void Tensor::allocate() {
   }
 }
 
-Tensor Tensor::Map(float *buf, unsigned int size, const TensorDim &d,
+Tensor Tensor::Map(float *buf, unsigned int bytes, const TensorDim &d,
                    int offset) {
   if (d.getDataLen() == 0 || buf == nullptr) {
     throw std::invalid_argument(
       "[Tensor::Map] empty tensor dim is not allowed");
   }
 
-  if (d.getDataLen() + offset > size) {
+  if (d.getDataLen() * sizeof(float) + offset > bytes) {
     throw std::invalid_argument(
       "Creating shared tensor of size bigger than tensor memory.");
   }
@@ -190,7 +190,7 @@ Tensor Tensor::Map(std::shared_ptr<float> buf, unsigned int size,
       "[Tensor::Map] empty tensor dim is not allowed");
   }
 
-  if (d.getDataLen() + offset > size) {
+  if (d.getDataLen() * sizeof(float) + offset > size) {
     throw std::invalid_argument(
       "Creating shared tensor of size bigger than tensor memory.");
   }
@@ -558,12 +558,12 @@ void Tensor::createSharedDataTensor(const Tensor &src, Tensor &dest,
 Tensor Tensor::getSharedDataTensor(const TensorDim dim_, unsigned int offset,
                                    bool reset_stride) const {
   Tensor ret = *this;
+  ret.dim = dim_;
 
-  if (dim_.getDataLen() + offset > dim.getDataLen())
+  if (ret.bytes() + offset > bytes())
     throw std::invalid_argument(
       "Creating shared tensor of size bigger than tensor memory.");
 
-  ret.dim = dim_;
   if (reset_stride)
     ret.strides = ret.dim.computeStrides();
 
index f1ef5e8d50a1f0814e2d1850fc658b7e03007123..0ab459873dbac3c06bf164f853869245945375ae 100644 (file)
@@ -182,13 +182,13 @@ public:
    * This will not copy buffer to a new tensor but directly uses it
    *
    * @param buf buffer
-   * @param size buffer size in bytes
+   * @param bytes buffer size in bytes
    * @param d tensor dim
    * @param offset offset to be used from current
    * @return Tensor object
    * @throws std::invalid_argument if buf is null
    */
-  static Tensor Map(float *buf, unsigned int size, const TensorDim &d,
+  static Tensor Map(float *buf, unsigned int bytes, const TensorDim &d,
                     int offset = 0);
 
   /**
@@ -932,6 +932,9 @@ public:
    * updateBatch and then allocate again to avoid such issues.
    */
   void updateBatch(unsigned int batch) {
+    if (dim.batch() == batch) {
+      return;
+    }
     dim.batch(batch);
     if (isAllocated())
       reallocate();
index f0a40d2fcd7d140fb1b2880d898aabb5835e176c..a4fd2ba09d121fa5e1ad3278af0fde84fcb1e114 100644 (file)
@@ -188,7 +188,7 @@ TEST_P(DataProducerSemantics_samples, error_once_or_not_pn) {
       generator(0, std::get<0>(sample_data), std::get<1>(sample_data)));
   } else {
     EXPECT_NO_THROW(
-      generator(0, std::get<1>(sample_data), std::get<1>(sample_data)));
+      generator(0, std::get<0>(sample_data), std::get<1>(sample_data)));
   }
 }
 
@@ -210,7 +210,7 @@ TEST_P(DataProducerSemantics_samples, fetch_one_epoch_or_10_iteration_pn) {
   for (unsigned i = 0; i < sz; ++i) {
     auto last = generator(i, input_view, label_view);
 
-    if (i == sz - 1) {
+    if (i == sz - 1 && has_fixed_size) {
       EXPECT_TRUE(last);
     } else {
       ASSERT_FALSE(last) << " reached last at iteration: " << i << '\n';
index 8a65de5201c6bf14838cb06c61314b43e301ebcf..64ad93c72167b2d8a4960915412166b7f8eabefe 100644 (file)
 #include <tensor.h>
 
 namespace {
-std::vector<nntrainer::TensorDim> input_shapes = {{3, 2, 4, 5}, {3, 2, 3, 4}};
-std::vector<nntrainer::TensorDim> label_shapes = {{3, 1, 1, 10}, {3, 1, 1, 2}};
+std::vector<nntrainer::TensorDim> input_shapes = {{1, 2, 4, 5}, {1, 2, 3, 4}};
+std::vector<nntrainer::TensorDim> label_shapes = {{1, 1, 1, 10}, {1, 1, 1, 2}};
 int user_data = 0;
 
-int getBatch(float **outVec, float **outLabel, bool *last, void *user_data) {
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data) {
   /** test user data is given correctly */
   int *ud = reinterpret_cast<int *>(user_data);
   *ud += 1;
 
   /** first input/label is all zero, second input/label is all one */
   auto first_input = nntrainer::Tensor::Map(
-    *outVec, input_shapes[0].getDataLen(), input_shapes[0]);
+    *outVec, input_shapes[0].getDataLen() * sizeof(float), input_shapes[0]);
   first_input.setValue(0);
 
   auto second_input = nntrainer::Tensor::Map(
-    *(outVec + 1), input_shapes[1].getDataLen(), input_shapes[1]);
+    *(outVec + 1), input_shapes[1].getDataLen() * sizeof(float),
+    input_shapes[1]);
   second_input.setValue(1);
 
   auto first_label = nntrainer::Tensor::Map(
-    *outLabel, label_shapes[0].getDataLen(), label_shapes[0]);
+    *outLabel, label_shapes[0].getDataLen() * sizeof(float), label_shapes[0]);
   first_label.setValue(0);
 
   auto second_label = nntrainer::Tensor::Map(
-    *(outLabel + 1), label_shapes[1].getDataLen(), label_shapes[1]);
+    *(outLabel + 1), label_shapes[1].getDataLen() * sizeof(float),
+    label_shapes[1]);
   second_label.setValue(1);
   *last = false;
 
   return 0;
 };
 
-int getBatch_error(float **outVec, float **outLabel, bool *last,
-                   void *user_data) {
+int getSample_error(float **outVec, float **outLabel, bool *last,
+                    void *user_data) {
   return -1;
 }
 
@@ -77,38 +79,42 @@ bool validate(const std::vector<nntrainer::Tensor> &inputs,
 } // namespace
 
 std::unique_ptr<nntrainer::DataProducer>
-createConstantBatchProducer(const std::vector<std::string> &properties = {}) {
+createConstantSampleProducer(const std::vector<std::string> &properties = {}) {
   std::unique_ptr<nntrainer::DataProducer> ptr =
-    std::make_unique<nntrainer::FuncDataProducer>(getBatch, &user_data);
+    std::make_unique<nntrainer::FuncDataProducer>(getSample, &user_data);
   return ptr;
 }
 
 std::unique_ptr<nntrainer::DataProducer>
-createErrorBatchProducer(const std::vector<std::string> &properties = {}) {
+createErrorSampleProducer(const std::vector<std::string> &properties = {}) {
   std::unique_ptr<nntrainer::DataProducer> ptr =
-    std::make_unique<nntrainer::FuncDataProducer>(getBatch_error, nullptr);
+    std::make_unique<nntrainer::FuncDataProducer>(getSample_error, nullptr);
   return ptr;
 }
 
 std::unique_ptr<nntrainer::DataProducer>
-createNullBatchProducer(const std::vector<std::string> &properties = {}) {
+createNullSampleProducer(const std::vector<std::string> &properties = {}) {
   std::unique_ptr<nntrainer::DataProducer> ptr =
     std::make_unique<nntrainer::FuncDataProducer>(nullptr, nullptr);
   return ptr;
 }
 
 auto func_success = DataProducerSemanticsParamType(
-  createConstantBatchProducer, {}, input_shapes, label_shapes, validate,
+  createConstantSampleProducer, {}, input_shapes, label_shapes, validate,
   DataProducerSemanticsExpectedResult::SUCCESS);
 
 auto func_error = DataProducerSemanticsParamType(
-  createErrorBatchProducer, {}, input_shapes, label_shapes, nullptr,
+  createErrorSampleProducer, {}, input_shapes, label_shapes, nullptr,
   DataProducerSemanticsExpectedResult::FAIL_AT_GENERATOR_CALL);
 
 auto func_nullptr = DataProducerSemanticsParamType(
-  createNullBatchProducer, {}, input_shapes, label_shapes, nullptr,
+  createNullSampleProducer, {}, input_shapes, label_shapes, nullptr,
   DataProducerSemanticsExpectedResult::FAIL_AT_FINALIZE);
 
 INSTANTIATE_TEST_CASE_P(Func, DataProducerSemantics,
                         ::testing::Values(func_success, func_error,
                                           func_nullptr));
+
+INSTANTIATE_TEST_CASE_P(Func, DataProducerSemantics_samples,
+                        ::testing::Values(func_success, func_error,
+                                          func_nullptr));
index 9bc01db413f47748c028f1d88c468b00c09b46f9..5d8c3a07c285b781acad606d720e1b4deaa4b03e 100644 (file)
@@ -108,11 +108,11 @@ TEST(nntrainer_TensorDim, setTensorDim_04_p) {
   EXPECT_EQ(d.width(), 7u);
 }
 
-TEST(nntrainer_Tensor, TensorWrap_p) {
+TEST(nntrainer_Tensor, TensorMap_p) {
   float dat[] = {1, 2, 3};
 
   {
-    nntrainer::Tensor a = nntrainer::Tensor::Map(dat, 3, {3});
+    nntrainer::Tensor a = nntrainer::Tensor::Map(dat, 3 * sizeof(float), {3});
     /// check if a.getData() has same address with dat
     EXPECT_EQ(dat, a.getData());
     {