This patch apply new dataset to file producer.
There was a test number changes.
reason:
1. Now, partial batch drop is determinisitic
2. Reshuffling is now called after partial batch is filled, (previously
it was before partial batch is filled)
Verified numbers without shuffling with
https://github.com/nnstreamer/nntrainer/pull/1416/files#r685695542
**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
*/
~FuncDataProducer();
- inline static const std::string type = "batch_callback";
+ inline static const std::string type = "callback";
/**
* @copydoc DataProducer::getType()
RawFileDataProducer::finalize_sample(const std::vector<TensorDim> &input_dims,
const std::vector<TensorDim> &label_dims,
void *user_data) {
- auto sz = size(input_dims, label_dims);
+ auto sz = size_sample(input_dims, label_dims);
auto path_prop = std::get<props::FilePath>(*raw_file_props);
+ auto size_accumulator = [](const unsigned int &a, const TensorDim &b) {
+ return a + b.getFeatureLen();
+ };
+
+ auto sample_size =
+ std::accumulate(input_dims.begin(), input_dims.end(), 0u, size_accumulator);
+ sample_size = std::accumulate(label_dims.begin(), label_dims.end(),
+ sample_size, size_accumulator);
+
/****************** Prepare states ****************/
auto idxes_ = std::vector<unsigned int>();
idxes_.reserve(sz);
/// idxes point to the file position in bytes where a sample starts
std::generate_n(std::back_inserter(idxes_), sz,
- [sz, current = 0ULL]() mutable {
+ [sample_size, current = 0ULL]() mutable {
auto c = current;
- current += sz * RawFileDataProducer::pixel_size;
+ current += sample_size * RawFileDataProducer::pixel_size;
return c;
});
- static thread_local std::ifstream file_(path_prop.get(), std::ios::binary);
-
- return [idxes = std::move(idxes_), sz](unsigned int idx,
- std::vector<Tensor> &inputs,
- std::vector<Tensor> &labels) {
+ /// as we are passing the reference of file, this means created lamabda is
+ /// tightly couple with the file, this is not desirable but working fine for
+ /// now...
+ file = std::ifstream(path_prop.get(), std::ios::binary);
+ return [idxes = std::move(idxes_), sz, this](unsigned int idx,
+ std::vector<Tensor> &inputs,
+ std::vector<Tensor> &labels) {
NNTR_THROW_IF(idx >= sz, std::range_error)
<< "given index is out of bound, index: " << idx << " size: " << sz;
-
- file_.seekg(idxes[idx], std::ios_base::beg);
+ file.seekg(idxes[idx], std::ios_base::beg);
for (auto &input : inputs) {
- input.read(file_);
+ input.read(file);
}
for (auto &label : labels) {
- label.read(file_);
+ label.read(file);
}
return idx == sz - 1;
#include <dataset.h>
+#include <fstream>
#include <memory>
#include <string>
#include <vector>
const std::vector<TensorDim> &label_dims) const override;
private:
+ std::ifstream file;
using PropTypes = std::tuple<props::FilePath>;
std::unique_ptr<PropTypes> raw_file_props;
};
for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
training.loss = 0.0f;
- auto future_bq = train_buffer->startFetchWorker(in_dims, label_dims);
+
+ std::future<std::shared_ptr<BatchQueue>> future_bq;
+ std::future<std::shared_ptr<IterationQueue>> future_iq;
+ if (train_buffer->getType() == "callback") {
+ future_bq = train_buffer->startFetchWorker(in_dims, label_dims);
+ } else {
+ future_iq =
+ train_buffer->startFetchWorker_sample(in_dims, label_dims, true);
+ }
// /// @todo make this working, test buffer is running but doing nothing
// if (test_buffer != nullptr && test_buffer->isValid()) {
int count = 0;
while (true) {
- auto [last, ins, labels] = *train_buffer->fetch();
- if (last) {
- break;
- }
- /// @todo multiple input support
- in = ins[0];
- label = labels[0];
+ ScopedView<Iteration> iter_view(nullptr);
+ if (train_buffer->getType() == "callback") {
+ auto [last, ins, labels] = *train_buffer->fetch();
+ /// @todo multiple input support
+ if (last) {
+ break;
+ }
+ in = ins[0];
+ label = labels[0];
+ } else {
+ iter_view = train_buffer->fetch_sample();
+ if (iter_view.isEmpty()) {
+ break;
+ }
+ auto &iter = iter_view.get();
+ if (iter.batch() != batch_size) {
+ /// this is partial batch scenario
+ continue;
+ }
+ /// @todo multiple input support
+ in = iter.getInputsRef().front();
+ label = iter.getLabelsRef().front();
+ }
forwarding(true);
backwarding(iter++);
train_buffer->displayProgress(count++, loss);
training.loss += loss;
}
- future_bq.get();
+
+ if (train_buffer->getType() == "callback") {
+ future_bq.get();
+ } else {
+ future_iq.get();
+ }
if (count == 0)
throw std::runtime_error("No training data");
int right = 0;
validation.loss = 0.0f;
unsigned int tcases = 0;
+ std::future<std::shared_ptr<BatchQueue>> future_bq;
+ std::future<std::shared_ptr<IterationQueue>> future_iq;
+
+ if (valid_buffer->getType() == "callback") {
+ future_bq = valid_buffer->startFetchWorker(in_dims, label_dims);
+ } else {
+ future_iq =
+ valid_buffer->startFetchWorker_sample(in_dims, label_dims, false);
+ }
- auto future_val_bq = valid_buffer->startFetchWorker(in_dims, label_dims);
while (true) {
- auto [last, ins, labels] = *valid_buffer->fetch();
- if (last) {
- break;
+ ScopedView<Iteration> iter_view(nullptr);
+ if (valid_buffer->getType() == "callback") {
+ auto [last, ins, labels] = *valid_buffer->fetch();
+ if (last) {
+ break;
+ }
+ /// @todo multiple input support
+ in = ins[0];
+ label = labels[0];
+ } else {
+ iter_view = valid_buffer->fetch_sample();
+ if (iter_view.isEmpty()) {
+ break;
+ }
+ auto &iter = iter_view.get();
+ if (iter.batch() != batch_size) {
+ /// this is partial batch scenario
+ continue;
+ }
+ /// @todo multiple input support
+ in = iter.getInputsRef().front();
+ label = iter.getLabelsRef().front();
}
- /// @todo multiple input support
- in = ins[0];
- label = labels[0];
forwarding(false);
auto model_out = output.argmax();
validation.loss += getLoss();
tcases++;
}
- future_val_bq.get();
+
+ if (valid_buffer->getType() == "callback") {
+ future_bq.get();
+ } else {
+ future_iq.get();
+ }
if (tcases == 0) {
ml_loge("Error : 0 test cases");
EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
EXPECT_NO_THROW(model->train());
- EXPECT_NEAR(model->getTrainingLoss(), 4.1389656, tolerance);
- EXPECT_NEAR(model->getValidationLoss(), 3.668904, tolerance);
+ EXPECT_NEAR(model->getTrainingLoss(), 4.13896, tolerance);
+ EXPECT_NEAR(model->getValidationLoss(), 3.64587, tolerance);
}
/**
EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
EXPECT_NO_THROW(model->train());
- EXPECT_NEAR(model->getTrainingLoss(), 2.171251, tolerance);
- EXPECT_NEAR(model->getValidationLoss(), 2.2015938, tolerance);
+ EXPECT_NEAR(model->getTrainingLoss(), 2.1782395, tolerance);
+ EXPECT_NEAR(model->getValidationLoss(), 2.2059061, tolerance);
}
/**
/** Update batch size after initialize */
EXPECT_EQ(model->initialize(), ML_ERROR_NONE);
EXPECT_NO_THROW(model->setProperty({"batch_size=8"}));
-
EXPECT_NO_THROW(model->train());
/** Update batch size after train */
EXPECT_NO_THROW(model->setProperty({"batch_size=4"}));
EXPECT_NO_THROW(model->train());
- EXPECT_NEAR(model->getTrainingLoss(), 1.897739, tolerance);
- EXPECT_NEAR(model->getValidationLoss(), 2.15521, tolerance);
+ EXPECT_NEAR(model->getTrainingLoss(), 1.9332184, tolerance);
+ EXPECT_NEAR(model->getValidationLoss(), 2.179843, tolerance);
}
/**
EXPECT_EQ(status, ML_ERROR_NONE);
/** Compare training statistics */
- nntrainer_capi_model_comp_metrics(handle, 4.330389, 3.7373299, 10.4167);
+ nntrainer_capi_model_comp_metrics(handle, 4.330389, 3.6865699, 10.4167);
status = ml_train_model_destroy(handle);
EXPECT_EQ(status, ML_ERROR_NONE);
EXPECT_EQ(status, ML_ERROR_NONE);
/** Compare training statistics */
- nntrainer_capi_model_comp_metrics(model, 2.108340, 2.216799, 20.8333);
+ nntrainer_capi_model_comp_metrics(model, 2.111340, 2.209510, 16.6667);
status = ml_train_model_destroy(model);
EXPECT_EQ(status, ML_ERROR_NONE);