From: Jihoon Lee <jhoon.it.lee@samsung.com>
Date: Fri, 20 Aug 2021 09:14:54 +0000 (+0900)
Subject: [Dataset] Change and apply callback handler
X-Git-Tag: submit/tizen/20210827.122527~3
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3acefd899fa7372184c9c2283c9fd592fd35c69d;p=platform%2Fcore%2Fml%2Fnntrainer.git

[Dataset] Change and apply callback handler

This patch change and apply callback handler from batchwise ->
samplewise

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
---

diff --git a/Applications/Custom/LayerClient/jni/main.cpp b/Applications/Custom/LayerClient/jni/main.cpp
index 1d65d31e..b655fce4 100644
--- a/Applications/Custom/LayerClient/jni/main.cpp
+++ b/Applications/Custom/LayerClient/jni/main.cpp
@@ -40,7 +40,7 @@ int constant_generator_cb(float **outVec, float **outLabel, bool *last,
                           void *user_data) {
   static int count = 0;
   unsigned int i;
-  unsigned int data_size = BATCH_SIZE * FEATURE_SIZE;
+  unsigned int data_size = FEATURE_SIZE;
 
   for (i = 0; i < data_size; ++i) {
     outVec[0][i] = 2.0f;
@@ -51,12 +51,12 @@ int constant_generator_cb(float **outVec, float **outLabel, bool *last,
   }
   outLabel[0][0] = 1.0f;
 
+  count++;
   if (count == 10) {
     *last = true;
     count = 0;
   } else {
     *last = false;
-    count++;
   }
 
   return ML_ERROR_NONE;
diff --git a/Applications/LogisticRegression/jni/main.cpp b/Applications/LogisticRegression/jni/main.cpp
index d2b304f5..9c22b1e3 100644
--- a/Applications/LogisticRegression/jni/main.cpp
+++ b/Applications/LogisticRegression/jni/main.cpp
@@ -24,12 +24,12 @@
  * (test.txt)
  */
 
+#include <algorithm>
 #include <cmath>
 #include <fstream>
 #include <iostream>
+#include <random>
 #include <sstream>
-#include <stdlib.h>
-#include <time.h>
 
 #include <databuffer.h>
 #include <neuralnet.h>
@@ -47,6 +47,8 @@ const unsigned int feature_size = 2;
 
 const unsigned int total_val_data_size = 10;
 
+constexpr unsigned int SEED = 0;
+
 bool training = false;
 
 /**
@@ -69,13 +71,12 @@ float stepFunction(float x) {
 /**
  * @brief     get idth Data
  * @param[in] F file stream
- * @param[out] outVec feature data
- * @param[out] outLabel label data
+ * @param[out] input feature data
+ * @param[out] label label data
  * @param[in] id id th
  * @retval boolean true if there is no error
  */
-bool getData(std::ifstream &F, std::vector<float> &outVec,
-             std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
   std::string temp;
   F.clear();
   F.seekg(0, std::ios_base::beg);
@@ -94,55 +95,41 @@ bool getData(std::ifstream &F, std::vector<float> &outVec,
   float x;
   for (unsigned int j = 0; j < feature_size; ++j) {
     buffer >> x;
-    outVec[j] = x;
+    input[j] = x;
   }
   buffer >> x;
-  outLabel[0] = x;
+  label[0] = x;
 
   return true;
 }
 
+std::mt19937 rng;
+std::vector<unsigned int> train_idxes;
+
 /**
- * @brief     get Data as much as batch size
+ * @brief     get a single data
  * @param[out] outVec feature data
  * @param[out] outLabel label data
  * @param[out] last end of data
  * @param[in] user_data user data
  * @retval int 0 if there is no error
  */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
-                   void *user_data) {
+int getSample_train(float **outVec, float **outLabel, bool *last,
+                    void *user_data) {
   std::ifstream dataFile(data_file);
-  unsigned int data_size = total_train_data_size;
-  unsigned int count = 0;
 
-  if (data_size - train_count < batch_size) {
+  if (!getData(dataFile, *outVec, *outLabel, train_idxes.at(train_count))) {
+    return -1;
+  }
+  train_count++;
+  if (train_count < total_train_data_size) {
+    *last = false;
+  } else {
     *last = true;
     train_count = 0;
-    return 0;
-  }
-
-  for (unsigned int i = train_count; i < train_count + batch_size; ++i) {
-
-    std::vector<float> o;
-    std::vector<float> l;
-    o.resize(feature_size);
-    l.resize(1);
-
-    if (!getData(dataFile, o, l, i)) {
-      return -1;
-    };
-
-    for (unsigned int j = 0; j < feature_size; ++j)
-      outVec[0][count * feature_size + j] = o[j];
-    outLabel[0][count] = l[0];
-
-    count++;
+    std::shuffle(train_idxes.begin(), train_idxes.end(), rng);
   }
 
-  dataFile.close();
-  *last = false;
-  train_count += batch_size;
   return 0;
 }
 
@@ -161,6 +148,9 @@ int main(int argc, char *argv[]) {
   }
 
   const std::string weight_path = "logistic_model.bin";
+  train_idxes.resize(total_train_data_size);
+  std::iota(train_idxes.begin(), train_idxes.end(), 0);
+  rng.seed(SEED);
 
   const std::vector<std::string> args(argv + 1, argv + argc);
   std::string config = args[1];
@@ -171,8 +161,8 @@ int main(int argc, char *argv[]) {
 
   srand(time(NULL));
 
-  auto data_train =
-    ml::train::createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+  auto data_train = ml::train::createDataset(ml::train::DatasetType::GENERATOR,
+                                             getSample_train);
 
   /**
    * @brief     Create NN
@@ -219,7 +209,7 @@ int main(int argc, char *argv[]) {
       o.resize(feature_size);
       l.resize(1);
 
-      getData(dataFile, o, l, j);
+      getData(dataFile, o.data(), l.data(), j);
 
       try {
         float answer =
diff --git a/Applications/MNIST/jni/main.cpp b/Applications/MNIST/jni/main.cpp
index 4445448c..66823f4c 100644
--- a/Applications/MNIST/jni/main.cpp
+++ b/Applications/MNIST/jni/main.cpp
@@ -23,15 +23,16 @@
 #define APP_VALIDATE
 #endif
 
+#include <algorithm>
 #include <climits>
 #include <cmath>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
-#include <queue>
+#include <memory>
+#include <random>
 #include <sstream>
-#include <stdlib.h>
-#include <time.h>
+#include <vector>
 
 #if defined(APP_VALIDATE)
 #include <gtest/gtest.h>
@@ -45,6 +46,8 @@
 
 #define VALIDATION false
 
+constexpr unsigned int SEED = 0;
+
 #if VALIDATION
 /**
  * @brief     Data size for each category
@@ -55,8 +58,6 @@ const unsigned int total_val_data_size = 32;
 
 const unsigned int total_test_data_size = 32;
 
-const unsigned int buffer_size = 32;
-
 const unsigned int batch_size = 32;
 
 #else
@@ -67,8 +68,6 @@ const unsigned int total_val_data_size = 100;
 
 const unsigned int total_test_data_size = 100;
 
-const unsigned int buffer_size = 100;
-
 const unsigned int batch_size = 32;
 
 #endif
@@ -112,13 +111,12 @@ float stepFunction(float x) {
 /**
  * @brief     load data at specific position of file
  * @param[in] F  ifstream (input file)
- * @param[out] outVec
- * @param[out] outLabel
+ * @param[out] input input
+ * @param[out] label label
  * @param[in] id th data to get
  * @retval true/false false : end of data
  */
-bool getData(std::ifstream &F, std::vector<float> &outVec,
-             std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
   F.clear();
   F.seekg(0, std::ios_base::end);
   uint64_t file_length = F.tellg();
@@ -129,119 +127,75 @@ bool getData(std::ifstream &F, std::vector<float> &outVec,
     return false;
   }
   F.seekg(position, std::ios::beg);
-  for (unsigned int i = 0; i < feature_size; i++)
-    F.read((char *)&outVec[i], sizeof(float));
-  for (unsigned int i = 0; i < total_label_size; i++)
-    F.read((char *)&outLabel[i], sizeof(float));
+  F.read((char *)input, sizeof(float) * feature_size);
+  F.read((char *)label, sizeof(float) * total_label_size);
 
   return true;
 }
 
 /**
- * @brief      get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
+ * @brief UserData which stores information used to feed data from data callback
+ *
  */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
-                   void *user_data) {
-  std::vector<int> memI;
-  std::vector<int> memJ;
-  unsigned int count = 0;
-  int data_size = total_train_data_size;
-
-  std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-#if VALIDATION
-  if (data_size - train_count < batch_size) {
-#else
-  if (data_size * total_label_size - train_count < batch_size) {
-#endif
-    *last = true;
-    train_count = 0;
-    return ML_ERROR_NONE;
-  }
-
-  count = 0;
-  for (unsigned int i = train_count; i < train_count + batch_size; i++) {
-    std::vector<float> o;
-    std::vector<float> l;
-
-    o.resize(feature_size);
-    l.resize(total_label_size);
-
-    getData(F, o, l, i);
-
-    for (unsigned int j = 0; j < feature_size; ++j)
-      outVec[0][count * feature_size + j] = o[j];
-    for (unsigned int j = 0; j < total_label_size; ++j)
-      outLabel[0][count * total_label_size + j] = l[j];
-    count++;
+class DataInformation {
+public:
+  /**
+   * @brief Construct a new Data Information object
+   *
+   * @param num_samples number of data
+   * @param filename file name to read from
+   */
+  DataInformation(unsigned int num_samples, const std::string &filename);
+  unsigned int count;
+  unsigned int num_samples;
+  std::ifstream file;
+  std::vector<unsigned int> idxes;
+  std::mt19937 rng;
+};
+
+DataInformation::DataInformation(unsigned int num_samples,
+                                 const std::string &filename) :
+  count(0),
+  num_samples(num_samples),
+  file(filename, std::ios::in | std::ios::binary),
+  idxes(num_samples) {
+  std::iota(idxes.begin(), idxes.end(), 0);
+  rng.seed(SEED);
+  std::shuffle(idxes.begin(), idxes.end(), rng);
+  if (!file.good()) {
+    throw std::invalid_argument("given file is not good, filename: " +
+                                filename);
   }
-
-  F.close();
-  *last = false;
-  train_count += batch_size;
-  return ML_ERROR_NONE;
 }
 
 /**
- * @brief      get data which size is batch for validation
- * @param[out] outVec
- * @param[out] outLabel
+ * @brief      get data which size is batch for train
+ * @param[out] outInput input vectors
+ * @param[out] outLabel label vectors
  * @param[out] last if the data is finished
  * @param[in] user_data private data for the callback
  * @retval status for handling error
  */
-int getBatch_val(float **outVec, float **outLabel, bool *last,
-                 void *user_data) {
-
-  std::vector<int> memI;
-  std::vector<int> memJ;
-  unsigned int count = 0;
-  int data_size = total_val_data_size;
-
-  std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-#if VALIDATION
-  if (data_size - val_count < batch_size) {
-#else
-  if (data_size * total_label_size - val_count < batch_size) {
-#endif
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data) {
+  auto data = reinterpret_cast<DataInformation *>(user_data);
+
+  getData(data->file, *outVec, *outLabel, data->idxes.at(data->count));
+  data->count++;
+  if (data->count < data->num_samples) {
+    *last = false;
+  } else {
     *last = true;
-    val_count = 0;
-    return ML_ERROR_NONE;
+    data->count = 0;
+    std::shuffle(data->idxes.begin(), data->idxes.end(), data->rng);
   }
 
-  count = 0;
-  for (unsigned int i = val_count; i < val_count + batch_size; i++) {
-    std::vector<float> o;
-    std::vector<float> l;
-
-    o.resize(feature_size);
-    l.resize(total_label_size);
-
-    getData(F, o, l, i);
-
-    for (unsigned int j = 0; j < feature_size; ++j)
-      outVec[0][count * feature_size + j] = o[j];
-    for (unsigned int j = 0; j < total_label_size; ++j)
-      outLabel[0][count * total_label_size + j] = l[j];
-    count++;
-  }
-
-  F.close();
-  *last = false;
-  val_count += batch_size;
   return ML_ERROR_NONE;
 }
 
 #if defined(APP_VALIDATE)
 TEST(MNIST_training, verify_accuracy) {
-  EXPECT_FLOAT_EQ(training_loss, 2.3031187);
-  EXPECT_FLOAT_EQ(validation_loss, 2.2951343);
+  EXPECT_FLOAT_EQ(training_loss, 2.5698349);
+  EXPECT_FLOAT_EQ(validation_loss, 2.5551746);
 }
 #endif
 
@@ -255,8 +209,9 @@ int main(int argc, char *argv[]) {
   int status = 0;
 #ifdef APP_VALIDATE
   status = remove("mnist_model.bin");
-  if (status != 0)
+  if (status != 0) {
     std::cout << "Pre-existing model file doesn't exist.\n";
+  }
 #endif
   if (argc < 3) {
     std::cout << "./nntrainer_mnist mnist.ini dataset.dat\n";
@@ -267,26 +222,28 @@ int main(int argc, char *argv[]) {
   std::string config = args[0];
   filename = args[1];
 
-  std::ifstream f(filename);
-  if (!f.good()) {
-    std::cout << "dataset is not good, filename: " << filename << '\n';
-    exit(1);
+  std::unique_ptr<DataInformation> train_user_data;
+  std::unique_ptr<DataInformation> valid_user_data;
+  try {
+    train_user_data =
+      std::make_unique<DataInformation>(total_train_data_size, filename);
+    valid_user_data =
+      std::make_unique<DataInformation>(total_val_data_size, filename);
+  } catch (std::invalid_argument &e) {
+    std::cerr << "Error creating userdata for the data callback " << e.what()
+              << std::endl;
+    return 1;
   }
 
-  srand(time(NULL));
-  std::vector<std::vector<float>> inputVector, outputVector;
-  std::vector<std::vector<float>> inputValVector, outputValVector;
-  std::vector<std::vector<float>> inputTestVector, outputTestVector;
-
   /**
    * @brief     Data buffer Create & Initialization
    */
   std::shared_ptr<ml::train::Dataset> dataset_train, dataset_val;
   try {
-    dataset_train =
-      createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
-    dataset_val =
-      createDataset(ml::train::DatasetType::GENERATOR, getBatch_val);
+    dataset_train = createDataset(ml::train::DatasetType::GENERATOR, getSample,
+                                  train_user_data.get());
+    dataset_val = createDataset(ml::train::DatasetType::GENERATOR, getSample,
+                                valid_user_data.get());
   } catch (std::exception &e) {
     std::cerr << "Error creating dataset" << e.what() << std::endl;
     return 1;
diff --git a/Applications/ProductRatings/jni/main.cpp b/Applications/ProductRatings/jni/main.cpp
index fd486fc3..bab337be 100644
--- a/Applications/ProductRatings/jni/main.cpp
+++ b/Applications/ProductRatings/jni/main.cpp
@@ -17,9 +17,8 @@
 #include <cmath>
 #include <fstream>
 #include <iostream>
+#include <random>
 #include <sstream>
-#include <stdlib.h>
-#include <time.h>
 
 #include <dataset.h>
 #include <ml-api-common.h>
@@ -28,6 +27,8 @@
 
 std::string data_file;
 
+constexpr unsigned int SEED = 0;
+
 const unsigned int total_train_data_size = 25;
 
 unsigned int train_count = 0;
@@ -60,13 +61,12 @@ float stepFunction(float x) {
 /**
  * @brief     get idth Data
  * @param[in] F file stream
- * @param[out] outVec feature data
- * @param[out] outLabel label data
+ * @param[out] input feature data
+ * @param[out] label label data
  * @param[in] id id th
  * @retval boolean true if there is no error
  */
-bool getData(std::ifstream &F, std::vector<float> &outVec,
-             std::vector<float> &outLabel, unsigned int id) {
+bool getData(std::ifstream &F, float *input, float *label, unsigned int id) {
   std::string temp;
   F.clear();
   F.seekg(0, std::ios_base::beg);
@@ -78,18 +78,17 @@ bool getData(std::ifstream &F, std::vector<float> &outVec,
 
   F.putback(c);
 
-  if (!std::getline(F, temp)) {
+  if (!std::getline(F, temp))
     return false;
-  }
 
   std::istringstream buffer(temp);
   float x;
   for (unsigned int j = 0; j < feature_size; ++j) {
     buffer >> x;
-    outVec[j] = x;
+    input[j] = x;
   }
   buffer >> x;
-  outLabel[0] = x;
+  label[0] = x;
 
   return true;
 }
@@ -103,50 +102,34 @@ template <typename T> void loadFile(const char *filename, T &t) {
   file.close();
 }
 
+std::mt19937 rng;
+std::vector<unsigned int> train_idxes;
+
 /**
- * @brief     get Data as much as batch size
+ * @brief     get a single data
  * @param[out] outVec feature data
  * @param[out] outLabel label data
  * @param[out] last end of data
  * @param[in] user_data user data
  * @retval int 0 if there is no error
  */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
-                   void *user_data) {
+int getSample_train(float **outVec, float **outLabel, bool *last,
+                    void *user_data) {
   std::ifstream dataFile(data_file);
-  unsigned int data_size = total_train_data_size;
-  unsigned int count = 0;
-
-  if (data_size - train_count < batch_size) {
+  if (!getData(dataFile, *outVec, *outLabel, train_idxes.at(train_count))) {
+    return -1;
+  }
+  train_count++;
+  if (train_count < total_train_data_size) {
+    *last = false;
+  } else {
     *last = true;
     train_count = 0;
-    return 0;
+    std::shuffle(train_idxes.begin(), train_idxes.end(), rng);
   }
 
-  std::vector<float> o;
-  std::vector<float> l;
-  o.resize(feature_size);
-  l.resize(1);
-
-  for (unsigned int i = train_count; i < train_count + batch_size; ++i) {
-    if (!getData(dataFile, o, l, i)) {
-      return -1;
-    }
-
-    for (unsigned int j = 0; j < feature_size; ++j) {
-      outVec[0][count * feature_size + j] = o[j];
-    }
-    outLabel[0][count] = l[0];
-
-    count++;
-  }
-
-  dataFile.close();
-  *last = false;
-  train_count += batch_size;
   return 0;
 }
-
 /**
  * @brief     create NN
  *            back propagation of NN
@@ -172,14 +155,16 @@ int main(int argc, char *argv[]) {
   if (!args[0].compare("train"))
     training = true;
 
-  srand(time(NULL));
+  train_idxes.resize(total_train_data_size);
+  std::iota(train_idxes.begin(), train_idxes.end(), 0);
+  rng.seed(SEED);
 
   std::shared_ptr<ml::train::Dataset> dataset_train, dataset_val;
   try {
     dataset_train =
-      createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+      createDataset(ml::train::DatasetType::GENERATOR, getSample_train);
     dataset_val =
-      createDataset(ml::train::DatasetType::GENERATOR, getBatch_train);
+      createDataset(ml::train::DatasetType::GENERATOR, getSample_train);
   } catch (std::exception &e) {
     std::cerr << "Error creating dataset " << e.what() << std::endl;
     return 1;
@@ -188,7 +173,6 @@ int main(int argc, char *argv[]) {
   /**
    * @brief     Create NN
    */
-  std::vector<std::vector<float>> inputVector, outputVector;
   nntrainer::NeuralNetwork NN;
   /**
    * @brief     Initialize NN with configuration file path
@@ -258,7 +242,7 @@ int main(int argc, char *argv[]) {
       o.resize(feature_size);
       l.resize(1);
 
-      getData(dataFile, o, l, j);
+      getData(dataFile, o.data(), l.data(), j);
 
       try {
         float answer =
diff --git a/Applications/TransferLearning/Draw_Classification/jni/main.cpp b/Applications/TransferLearning/Draw_Classification/jni/main.cpp
index a9281d3e..fda60674 100644
--- a/Applications/TransferLearning/Draw_Classification/jni/main.cpp
+++ b/Applications/TransferLearning/Draw_Classification/jni/main.cpp
@@ -24,8 +24,6 @@
  *              Classifier : One Fully Connected Layer
  *
  */
-///@todo update below
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 
 #if defined(NNSTREAMER_AVAILABLE) && defined(ENABLE_TEST)
 #define APP_VALIDATE
@@ -149,12 +147,6 @@ void loadAllData(const std::string &data_path, float input_data[][INPUT_SIZE],
  */
 int getBatch_train(float **input, float **label, bool *last, void *user_data) {
   static unsigned int iteration = 0;
-  if (iteration >= EPOCH_SIZE) {
-    *last = true;
-    iteration = 0;
-    return ML_ERROR_NONE;
-  }
-
   for (int idx = 0; idx < INPUT_SIZE; idx++) {
     input[0][idx] = inputVector[iteration][idx];
   }
@@ -163,8 +155,13 @@ int getBatch_train(float **input, float **label, bool *last, void *user_data) {
     label[0][idx] = labelVector[iteration][idx];
   }
 
-  *last = false;
   iteration += 1;
+  if (iteration < EPOCH_SIZE) {
+    *last = false;
+  } else {
+    *last = true;
+    iteration = 0;
+  }
   return ML_ERROR_NONE;
 }
 
@@ -191,14 +188,22 @@ int trainModel(const char *config) {
   }
 
   /** Set the dataset from generator */
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  NULL, NULL);
+  status = ml_train_dataset_create(&dataset);
   if (status != ML_ERROR_NONE) {
     ml_train_model_destroy(handle);
     return status;
   }
 
-  status = ml_train_dataset_set_property(dataset, "buffer_size=100", NULL);
+  status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
+                                          getBatch_train, nullptr);
+  if (status != ML_ERROR_NONE) {
+    ml_train_dataset_destroy(dataset);
+    ml_train_model_destroy(handle);
+    return status;
+  }
+
+  status = ml_train_dataset_set_property_for_mode(
+    dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=100", NULL);
   if (status != ML_ERROR_NONE) {
     ml_train_dataset_destroy(dataset);
     ml_train_model_destroy(handle);
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
index 13c2acb9..c061044c 100644
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -635,14 +635,8 @@ int NeuralNetwork::train_run() {
   for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
     training.loss = 0.0f;
 
-    std::future<std::shared_ptr<BatchQueue>> future_bq;
-    std::future<std::shared_ptr<IterationQueue>> future_iq;
-    if (train_buffer->getType() == "callback") {
-      future_bq = train_buffer->startFetchWorker(in_dims, label_dims);
-    } else {
-      future_iq =
-        train_buffer->startFetchWorker_sample(in_dims, label_dims, true);
-    }
+    std::future<std::shared_ptr<IterationQueue>> future_iq =
+      train_buffer->startFetchWorker_sample(in_dims, label_dims, true);
 
     // /// @todo make this working, test buffer is running but doing nothing
     // if (test_buffer != nullptr && test_buffer->isValid()) {
@@ -656,30 +650,19 @@ int NeuralNetwork::train_run() {
     int count = 0;
 
     while (true) {
-      ScopedView<Iteration> iter_view(nullptr);
-      if (train_buffer->getType() == "callback") {
-
-        auto [last, ins, labels] = *train_buffer->fetch();
-        /// @todo multiple input support
-        if (last) {
-          break;
-        }
-        in = ins[0];
-        label = labels[0];
-      } else {
-        iter_view = train_buffer->fetch_sample();
-        if (iter_view.isEmpty()) {
-          break;
-        }
-        auto &iter = iter_view.get();
-        if (iter.batch() != batch_size) {
-          /// this is partial batch scenario
-          continue;
-        }
-        /// @todo multiple input support
-        in = iter.getInputsRef().front();
-        label = iter.getLabelsRef().front();
+      ScopedView<Iteration> iter_view = train_buffer->fetch_sample();
+      if (iter_view.isEmpty()) {
+        break;
+      }
+      auto &iteration = iter_view.get();
+      if (iteration.batch() != batch_size) {
+        /// this is partial batch scenario
+        continue;
       }
+      /// @todo multiple input support
+      in = iteration.getInputsRef().front();
+      label = iteration.getLabelsRef().front();
+
       forwarding(true);
       backwarding(iter++);
 
@@ -689,11 +672,7 @@ int NeuralNetwork::train_run() {
       training.loss += loss;
     }
 
-    if (train_buffer->getType() == "callback") {
-      future_bq.get();
-    } else {
-      future_iq.get();
-    }
+    future_iq.get();
 
     if (count == 0)
       throw std::runtime_error("No training data");
@@ -710,40 +689,23 @@ int NeuralNetwork::train_run() {
       int right = 0;
       validation.loss = 0.0f;
       unsigned int tcases = 0;
-      std::future<std::shared_ptr<BatchQueue>> future_bq;
-      std::future<std::shared_ptr<IterationQueue>> future_iq;
-
-      if (valid_buffer->getType() == "callback") {
-        future_bq = valid_buffer->startFetchWorker(in_dims, label_dims);
-      } else {
-        future_iq =
-          valid_buffer->startFetchWorker_sample(in_dims, label_dims, false);
-      }
+
+      std::future<std::shared_ptr<IterationQueue>> future_iq =
+        valid_buffer->startFetchWorker_sample(in_dims, label_dims, false);
 
       while (true) {
-        ScopedView<Iteration> iter_view(nullptr);
-        if (valid_buffer->getType() == "callback") {
-          auto [last, ins, labels] = *valid_buffer->fetch();
-          if (last) {
-            break;
-          }
-          /// @todo multiple input support
-          in = ins[0];
-          label = labels[0];
-        } else {
-          iter_view = valid_buffer->fetch_sample();
-          if (iter_view.isEmpty()) {
-            break;
-          }
-          auto &iter = iter_view.get();
-          if (iter.batch() != batch_size) {
-            /// this is partial batch scenario
-            continue;
-          }
-          /// @todo multiple input support
-          in = iter.getInputsRef().front();
-          label = iter.getLabelsRef().front();
+        ScopedView<Iteration> iter_view = valid_buffer->fetch_sample();
+        if (iter_view.isEmpty()) {
+          break;
         }
+        auto &iter = iter_view.get();
+        if (iter.batch() != batch_size) {
+          /// this is partial batch scenario
+          continue;
+        }
+        /// @todo multiple input support
+        in = iter.getInputsRef().front();
+        label = iter.getLabelsRef().front();
 
         forwarding(false);
         auto model_out = output.argmax();
@@ -756,11 +718,7 @@ int NeuralNetwork::train_run() {
         tcases++;
       }
 
-      if (valid_buffer->getType() == "callback") {
-        future_bq.get();
-      } else {
-        future_iq.get();
-      }
+      future_iq.get();
 
       if (tcases == 0) {
         ml_loge("Error : 0 test cases");
diff --git a/nntrainer/utils/base_properties.h b/nntrainer/utils/base_properties.h
index 7ef4d411..259f712c 100644
--- a/nntrainer/utils/base_properties.h
+++ b/nntrainer/utils/base_properties.h
@@ -350,7 +350,6 @@ template <typename DataType> struct str_converter<ptr_prop_tag, DataType> {
   static DataType from_string(const std::string &value) {
     std::stringstream ss(value);
     uintptr_t addr = static_cast<uintptr_t>(std::stoull(value, 0, 16));
-    std::cerr << "value: " << value << " addr: " << addr;
     return reinterpret_cast<DataType>(addr);
   }
 };
diff --git a/test/ccapi/unittest_ccapi.cpp b/test/ccapi/unittest_ccapi.cpp
index 19fdcdbb..7779fe63 100644
--- a/test/ccapi/unittest_ccapi.cpp
+++ b/test/ccapi/unittest_ccapi.cpp
@@ -291,14 +291,16 @@ TEST(nntrainer_ccapi, train_dataset_with_generator_01_p) {
        "beta1=0.002", "beta2=0.001", "epsilon=1e-7"}));
   EXPECT_NO_THROW(model->setOptimizer(optimizer));
 
+  auto train_data = createTrainData();
+  auto valid_data = createValidData();
   EXPECT_NO_THROW(dataset = ml::train::createDataset(
-                    ml::train::DatasetType::GENERATOR, getBatch_train));
+                    ml::train::DatasetType::GENERATOR, getSample, &train_data));
   EXPECT_NO_THROW(dataset->setProperty({"buffer_size=100"}));
   EXPECT_EQ(model->setDataset(ml::train::DatasetModeType::MODE_TRAIN, dataset),
             ML_ERROR_NONE);
 
   EXPECT_NO_THROW(dataset = ml::train::createDataset(
-                    ml::train::DatasetType::GENERATOR, getBatch_val));
+                    ml::train::DatasetType::GENERATOR, getSample, &valid_data));
   EXPECT_NO_THROW(dataset->setProperty({"buffer_size=100"}));
   EXPECT_EQ(model->setDataset(ml::train::DatasetModeType::MODE_VALID, dataset),
             ML_ERROR_NONE);
@@ -309,8 +311,8 @@ TEST(nntrainer_ccapi, train_dataset_with_generator_01_p) {
   EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
   EXPECT_NO_THROW(model->train());
 
-  EXPECT_NEAR(model->getTrainingLoss(), 2.2109976, tolerance);
-  EXPECT_NEAR(model->getValidationLoss(), 1.995334, tolerance);
+  EXPECT_NEAR(model->getTrainingLoss(), 2.238682, tolerance);
+  EXPECT_NEAR(model->getValidationLoss(), 2.0042247, tolerance);
 }
 
 /**
diff --git a/test/include/nntrainer_test_util.h b/test/include/nntrainer_test_util.h
index 00068c9b..4f4bec6c 100644
--- a/test/include/nntrainer_test_util.h
+++ b/test/include/nntrainer_test_util.h
@@ -137,25 +137,48 @@ void replaceString(const std::string &from, const std::string &to,
                    const std::string n, std::string str);
 
 /**
- * @brief      get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
+ * @brief UserData which stores information used to feed data from data callback
+ *
+ */
+class DataInformation {
+public:
+  /**
+   * @brief Construct a new Data Information object
+   *
+   * @param num_samples number of data
+   * @param filename file name to read from
+   */
+  DataInformation(unsigned int num_samples, const std::string &filename);
+  unsigned int count;
+  unsigned int num_samples;
+  std::ifstream file;
+  std::vector<unsigned int> idxes;
+  std::mt19937 rng;
+};
+
+/**
+ * @brief Create a user data for training
+ *
+ * @return DataInformation
+ */
+DataInformation createTrainData();
+
+/**
+ * @brief Create a user data for validataion
+ *
+ * @return DataInformation
  */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
-                   void *user_data);
+DataInformation createValidData();
 
 /**
- * @brief      get data which size is batch for val
+ * @brief      get data which size is batch
  * @param[out] outVec
  * @param[out] outLabel
  * @param[out] last if the data is finished
  * @param[in] user_data private data for the callback
  * @retval status for handling error
  */
-int getBatch_val(float **outVec, float **outLabel, bool *last, void *user_data);
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data);
 
 /**
  * @brief Get the Res Path object
diff --git a/test/nntrainer_test_util.cpp b/test/nntrainer_test_util.cpp
index 13479c6b..ae2664bf 100644
--- a/test/nntrainer_test_util.cpp
+++ b/test/nntrainer_test_util.cpp
@@ -33,10 +33,6 @@
 #define batch_size 16
 #define feature_size 62720
 
-static bool *duplicate;
-static bool *valduplicate;
-static bool alloc_train = false;
-static bool alloc_val = false;
 static std::mt19937 rng(0);
 
 /**
@@ -68,17 +64,6 @@ void replaceString(const std::string &from, const std::string &to,
   data_file.close();
 }
 
-/**
- * @brief     Generate Random integer value between min to max
- * @param[in] min : minimum value
- * @param[in] max : maximum value
- * @retval    min < random value < max
- */
-static int rangeRandom(int min, int max) {
-  std::uniform_int_distribution<int> dist(min, max);
-  return dist(rng);
-}
-
 /**
  * @brief     load data at specific position of file
  * @param[in] F  ifstream (input file)
@@ -87,8 +72,8 @@ static int rangeRandom(int min, int max) {
  * @param[in] id th data to get
  * @retval true/false false : end of data
  */
-static bool getData(std::ifstream &F, std::vector<float> &outVec,
-                    std::vector<float> &outLabel, unsigned int id) {
+static bool getData(std::ifstream &F, float *outVec, float *outLabel,
+                    unsigned int id) {
   F.clear();
   F.seekg(0, std::ios_base::end);
   uint64_t file_length = F.tellg();
@@ -100,179 +85,69 @@ static bool getData(std::ifstream &F, std::vector<float> &outVec,
     return false;
   }
   F.seekg(position, std::ios::beg);
-  for (unsigned int i = 0; i < feature_size; i++)
-    F.read((char *)&outVec[i], sizeof(float));
-  for (unsigned int i = 0; i < num_class; i++)
-    F.read((char *)&outLabel[i], sizeof(float));
+  F.read((char *)outVec, sizeof(float) * feature_size);
+  F.read((char *)outLabel, sizeof(float) * num_class);
 
   return true;
 }
 
-/**
- * @brief      get data which size is batch for train
- * @param[out] outVec
- * @param[out] outLabel
- * @param[out] last if the data is finished
- * @param[in] user_data private data for the callback
- * @retval status for handling error
- */
-int getBatch_train(float **outVec, float **outLabel, bool *last,
-                   void *user_data) {
-  std::vector<int> memI;
-  std::vector<int> memJ;
-  unsigned int count = 0;
-  unsigned int data_size = 0;
-  *last = true;
-
-  std::string filename = getResPath("trainingSet.dat", {"test"});
-  std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-  if (F.good()) {
-    F.seekg(0, std::ios::end);
-    long file_size = F.tellg();
-    data_size = static_cast<unsigned int>(
-      file_size / ((num_class + feature_size) * sizeof(float)));
-  }
-
-  if (!alloc_train) {
-    duplicate = (bool *)malloc(sizeof(bool) * data_size);
-    if (duplicate == nullptr) {
-      ml_loge("[test_util] allocationg memory failed");
-      alloc_train = false;
-      *last = false;
-      F.close();
-      return ML_ERROR_BAD_ADDRESS;
-    }
-
-    for (unsigned int i = 0; i < data_size; ++i) {
-      duplicate[i] = false;
-    }
-    alloc_train = true;
-  }
-
-  for (unsigned int i = 0; i < data_size; i++) {
-    if (!duplicate[i])
-      count++;
-  }
-
-  if (count < batch_size) {
-    free(duplicate);
-    alloc_train = false;
-    *last = true;
-    return ML_ERROR_NONE;
-  }
-
-  count = 0;
-  while (count < batch_size) {
-    int nomI = rangeRandom(0, data_size - 1);
-    if (!duplicate[nomI]) {
-      memI.push_back(nomI);
-      duplicate[nomI] = true;
-      count++;
-    }
+DataInformation::DataInformation(unsigned int num_samples,
+                                 const std::string &filename) :
+  count(0),
+  num_samples(num_samples),
+  file(filename, std::ios::in | std::ios::binary),
+  idxes(num_samples) {
+  std::iota(idxes.begin(), idxes.end(), 0);
+  std::shuffle(idxes.begin(), idxes.end(), rng);
+  rng.seed(0);
+  if (!file.good()) {
+    throw std::invalid_argument("given file is not good, filename: " +
+                                filename);
   }
+}
 
-  for (unsigned int i = 0; i < count; i++) {
-    std::vector<float> o;
-    std::vector<float> l;
-
-    o.resize(feature_size);
-    l.resize(num_class);
-
-    getData(F, o, l, memI[i]);
-
-    for (unsigned int j = 0; j < feature_size; ++j)
-      outVec[0][i * feature_size + j] = o[j];
-    for (unsigned int j = 0; j < num_class; ++j)
-      outLabel[0][i * num_class + j] = l[j];
-  }
+static auto getDataSize = [](const std::string &file_name) {
+  std::ifstream f(file_name, std::ios::in | std::ios::binary);
+  NNTR_THROW_IF(!f.good(), std::invalid_argument)
+    << "cannot find " << file_name;
+  f.seekg(0, std::ios::end);
+  long file_size = f.tellg();
+  return static_cast<unsigned int>(
+    file_size / ((num_class + feature_size) * sizeof(float)));
+};
+
+std::string train_filename = getResPath("trainingSet.dat", {"test"});
+std::string valid_filename = getResPath("trainingSet.dat", {"test"});
+
+DataInformation createTrainData() {
+  return DataInformation(getDataSize(train_filename), train_filename);
+}
 
-  F.close();
-  *last = false;
-  return ML_ERROR_NONE;
+DataInformation createValidData() {
+  return DataInformation(getDataSize(valid_filename), valid_filename);
 }
 
 /**
- * @brief      get data which size is batch for validation
+ * @brief      get data which size is batch for train
  * @param[out] outVec
  * @param[out] outLabel
  * @param[out] last if the data is finished
  * @param[in] user_data private data for the callback
  * @retval status for handling error
  */
-int getBatch_val(float **outVec, float **outLabel, bool *last,
-                 void *user_data) {
-
-  std::vector<int> memI;
-  std::vector<int> memJ;
-  unsigned int count = 0;
-  unsigned int data_size = 0;
-  *last = true;
-
-  std::string filename = getResPath("trainingSet.dat", {"test"});
-  std::ifstream F(filename, std::ios::in | std::ios::binary);
-
-  if (F.good()) {
-    F.seekg(0, std::ios::end);
-    long file_size = F.tellg();
-    data_size = static_cast<unsigned int>(
-      file_size / ((num_class + feature_size) * sizeof(float)));
-  }
+int getSample(float **outVec, float **outLabel, bool *last, void *user_data) {
+  auto data = reinterpret_cast<DataInformation *>(user_data);
 
-  if (!alloc_val) {
-    valduplicate = (bool *)malloc(sizeof(bool) * data_size);
-    if (valduplicate == nullptr) {
-      ml_loge("[test_util] allocationg memory failed");
-      alloc_val = false;
-      *last = false;
-      F.close();
-      return ML_ERROR_BAD_ADDRESS;
-    }
-    for (unsigned int i = 0; i < data_size; ++i) {
-      valduplicate[i] = false;
-    }
-    alloc_val = true;
-  }
-
-  for (unsigned int i = 0; i < data_size; i++) {
-    if (!valduplicate[i])
-      count++;
-  }
-
-  if (count < batch_size) {
-    free(valduplicate);
-    alloc_val = false;
+  getData(data->file, *outVec, *outLabel, data->idxes.at(data->count));
+  data->count++;
+  if (data->count < data->num_samples) {
+    *last = false;
+  } else {
     *last = true;
-    return ML_ERROR_NONE;
-  }
-
-  count = 0;
-  while (count < batch_size) {
-    int nomI = rangeRandom(0, data_size - 1);
-    if (!valduplicate[nomI]) {
-      memI.push_back(nomI);
-      valduplicate[nomI] = true;
-      count++;
-    }
-  }
-
-  for (unsigned int i = 0; i < count; i++) {
-    std::vector<float> o;
-    std::vector<float> l;
-
-    o.resize(feature_size);
-    l.resize(num_class);
-
-    getData(F, o, l, memI[i]);
-
-    for (unsigned int j = 0; j < feature_size; ++j)
-      outVec[0][i * feature_size + j] = o[j];
-    for (unsigned int j = 0; j < num_class; ++j)
-      outLabel[0][i * num_class + j] = l[j];
+    data->count = 0;
+    std::shuffle(data->idxes.begin(), data->idxes.end(), data->rng);
   }
 
-  F.close();
-  *last = false;
   return ML_ERROR_NONE;
 }
 
diff --git a/test/tizen_capi/unittest_tizen_capi.cpp b/test/tizen_capi/unittest_tizen_capi.cpp
index 151ce9da..0c8aa0d3 100644
--- a/test/tizen_capi/unittest_tizen_capi.cpp
+++ b/test/tizen_capi/unittest_tizen_capi.cpp
@@ -795,13 +795,24 @@ TEST(nntrainer_capi_nnmodel, train_with_generator_01_p) {
   status = ml_train_model_set_optimizer(model, optimizer);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  getBatch_val, NULL);
+  auto train_data = createTrainData();
+  auto valid_data = createValidData();
+
+  status = ml_train_dataset_create_with_generator(&dataset, getSample,
+                                                  getSample, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_set_property(dataset, "buffer_size=100", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
+  status = ml_train_dataset_set_property_for_mode(
+    dataset, ML_TRAIN_DATASET_MODE_TRAIN, "user_data", &train_data, NULL);
+  EXPECT_EQ(status, ML_ERROR_NONE);
+
+  status = ml_train_dataset_set_property_for_mode(
+    dataset, ML_TRAIN_DATASET_MODE_VALID, "user_data", &valid_data, NULL);
+  EXPECT_EQ(status, ML_ERROR_NONE);
+
   status = ml_train_model_set_dataset(model, dataset);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
@@ -813,7 +824,7 @@ TEST(nntrainer_capi_nnmodel, train_with_generator_01_p) {
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   /** Compare training statistics */
-  nntrainer_capi_model_comp_metrics(model, 2.17921, 1.96506, 60.4167);
+  nntrainer_capi_model_comp_metrics(model, 2.2063899, 1.983489, 64.583297);
 
   status = ml_train_model_destroy(model);
   EXPECT_EQ(status, ML_ERROR_NONE);
@@ -822,29 +833,24 @@ TEST(nntrainer_capi_nnmodel, train_with_generator_01_p) {
 static int constant_generator_cb(float **outVec, float **outLabel, bool *last,
                                  void *user_data) {
   static int count = 0;
-
-  unsigned int batch_size = 9;
   unsigned int feature_size = 100;
   unsigned int num_class = 10;
-  unsigned int data_size = batch_size * feature_size;
 
-  for (unsigned int i = 0; i < data_size; ++i) {
+  for (unsigned int i = 0; i < feature_size; ++i) {
     outVec[0][i] = 0.0f;
   }
 
-  for (unsigned int i = 0; i < batch_size; ++i) {
-    outLabel[0][i * num_class] = 1.0f;
-    for (unsigned int j = 1; j < num_class; ++j) {
-      outLabel[0][i * num_class + j] = 0.0f;
-    }
+  outLabel[0][0] = 1.0f;
+  for (unsigned int j = 1; j < num_class; ++j) {
+    outLabel[0][j] = 0.0f;
   }
 
-  if (count == 10) {
+  count++;
+  if (count == 9) {
     *last = true;
     count = 0;
   } else {
     *last = false;
-    count++;
   }
 
   return ML_ERROR_NONE;
diff --git a/test/tizen_capi/unittest_tizen_capi_dataset.cpp b/test/tizen_capi/unittest_tizen_capi_dataset.cpp
index 8821e026..90f55f1c 100644
--- a/test/tizen_capi/unittest_tizen_capi_dataset.cpp
+++ b/test/tizen_capi/unittest_tizen_capi_dataset.cpp
@@ -125,14 +125,14 @@ TEST(nntrainer_capi_dataset, create_destroy_06_n) {
 TEST(nntrainer_capi_dataset, create_destroy_07_p) {
   ml_train_dataset_h dataset;
   int status;
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  NULL, NULL);
+  status =
+    ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
   status = ml_train_dataset_destroy(dataset);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  getBatch_val, getBatch_val);
+  status = ml_train_dataset_create_with_generator(&dataset, getSample,
+                                                  getSample, getSample);
   EXPECT_EQ(status, ML_ERROR_NONE);
   status = ml_train_dataset_destroy(dataset);
   EXPECT_EQ(status, ML_ERROR_NONE);
@@ -147,19 +147,19 @@ TEST(nntrainer_cpi_dataset, add_generator_01_p) {
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
-                                          getBatch_train, NULL);
+                                          getSample, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
-                                          getBatch_val, NULL);
+                                          getSample, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_VALID,
-                                          getBatch_train, NULL);
+                                          getSample, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TEST,
-                                          getBatch_train, NULL);
+                                          getSample, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_destroy(dataset);
@@ -267,8 +267,8 @@ TEST(nntrainer_capi_dataset, set_dataset_property_01_p) {
   ml_train_dataset_h dataset;
   int status;
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  NULL, NULL);
+  status =
+    ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_set_property(dataset, "buffer_size=10", NULL);
@@ -303,8 +303,8 @@ TEST(nntrainer_capi_dataset, set_dataset_property_03_n) {
   ml_train_dataset_h dataset;
   int status;
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  NULL, NULL);
+  status =
+    ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_set_property(dataset, "user_data=10", NULL);
@@ -324,8 +324,8 @@ TEST(nntrainer_capi_dataset, set_dataset_property_04_p) {
   ml_train_dataset_h dataset;
   int status = ML_ERROR_NONE;
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  NULL, NULL);
+  status =
+    ml_train_dataset_create_with_generator(&dataset, getSample, NULL, NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status =
@@ -350,20 +350,22 @@ TEST(nntrainer_capi_dataset, set_dataset_property_for_mode_01_p) {
     dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=1", NULL);
   EXPECT_EQ(status, ML_ERROR_INVALID_PARAMETER);
 
+  auto train_data = createTrainData();
+  auto valid_data = createValidData();
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TRAIN,
-                                          getBatch_val, nullptr);
+                                          getSample, &train_data);
   status = ml_train_dataset_set_property_for_mode(
     dataset, ML_TRAIN_DATASET_MODE_TRAIN, "buffer_size=1", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_VALID,
-                                          getBatch_val, nullptr);
+                                          getSample, &valid_data);
   status = ml_train_dataset_set_property_for_mode(
     dataset, ML_TRAIN_DATASET_MODE_VALID, "buffer_size=1", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_add_generator(dataset, ML_TRAIN_DATASET_MODE_TEST,
-                                          getBatch_val, nullptr);
+                                          getSample, &train_data);
   status = ml_train_dataset_set_property_for_mode(
     dataset, ML_TRAIN_DATASET_MODE_TEST, "buffer_size=1", NULL);
   EXPECT_EQ(status, ML_ERROR_NONE);
@@ -379,9 +381,8 @@ TEST(nntrainer_capi_dataset,
      set_dataset_property_for_mode_does_not_exist_valid_n) {
   ml_train_dataset_h dataset;
   int status = ML_ERROR_NONE;
-
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  nullptr, getBatch_train);
+  status = ml_train_dataset_create_with_generator(&dataset, getSample, nullptr,
+                                                  getSample);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_set_property_for_mode(
@@ -400,8 +401,8 @@ TEST(nntrainer_capi_dataset,
   ml_train_dataset_h dataset;
   int status = ML_ERROR_NONE;
 
-  status = ml_train_dataset_create_with_generator(&dataset, getBatch_train,
-                                                  nullptr, nullptr);
+  status = ml_train_dataset_create_with_generator(&dataset, getSample, nullptr,
+                                                  nullptr);
   EXPECT_EQ(status, ML_ERROR_NONE);
 
   status = ml_train_dataset_set_property_for_mode(