From f21d6d22956dfc30964d8e83186da818e0c99975 Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Tue, 6 Oct 2020 13:20:13 +0900 Subject: [PATCH] Refactor getDataFromBuffer This patch rearranges getDataFromBuffer to not rely on nested buffers to unvoid unnecessary allocation & deallocation. This patch provides notable speedup for some cases. **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- nntrainer/include/databuffer.h | 13 ++- nntrainer/include/tensor.h | 2 +- nntrainer/src/databuffer.cpp | 187 +++++++++++++---------------------------- nntrainer/src/neuralnet.cpp | 21 ++--- 4 files changed, 77 insertions(+), 146 deletions(-) diff --git a/nntrainer/include/databuffer.h b/nntrainer/include/databuffer.h index 9190e95..0648061 100644 --- a/nntrainer/include/databuffer.h +++ b/nntrainer/include/databuffer.h @@ -70,8 +70,6 @@ */ constexpr const unsigned int NBUFTYPE = 4; -typedef std::vector>>> vec_4d; - typedef enum { DATA_NOT_READY = 0, DATA_READY = 1, @@ -175,14 +173,13 @@ public: /** * @brief get Data from Data Buffer using databuffer param * @param[in] BufferType training, validation, test - * @param[in] outVec feature data ( batch_size size ) - * @param[in] outLabel label data ( batch_size size ) + * @param[out] out feature data ( batch_size size ), a contiguous and + * allocated memory block should be passed + * @param[out] label label data ( batch_size size ), a contiguous and + * allocated memory block should be passed * @retval true/false */ - virtual bool getDataFromBuffer( - BufferType type, - std::vector>>> &out_vec, - std::vector>>> &out_label); + bool getDataFromBuffer(BufferType type, float *out, float *label); /** * @brief set number of class diff --git a/nntrainer/include/tensor.h b/nntrainer/include/tensor.h index cb9993c..1588113 100644 --- a/nntrainer/include/tensor.h +++ b/nntrainer/include/tensor.h @@ -32,7 +32,7 @@ #include -#define MAKE_SHARED_TENSOR(x) std::make_shared(x) +#define MAKE_SHARED_TENSOR(...) std::make_shared(__VA_ARGS__) namespace nntrainer { diff --git a/nntrainer/src/databuffer.cpp b/nntrainer/src/databuffer.cpp index 59425a7..3a99ea4 100644 --- a/nntrainer/src/databuffer.cpp +++ b/nntrainer/src/databuffer.cpp @@ -220,147 +220,80 @@ int DataBuffer::clear() { return status; } -bool DataBuffer::getDataFromBuffer(BufferType type, vec_4d &outVec, - vec_4d &outLabel) { - unsigned int J, i, j, k, L, l; - unsigned int width = input_dim.width(); - unsigned int height = input_dim.height(); - unsigned int channel = input_dim.channel(); +bool DataBuffer::getDataFromBuffer(BufferType type, float *out, float *label) { - switch (type) { - case BUF_TRAIN: { - std::vector list; - while (true) { - std::unique_lock ultrain(readyTrainData); - cv_train.wait(ultrain, [this]() -> bool { return trainReadyFlag; }); - if (trainReadyFlag == DATA_ERROR || trainReadyFlag == DATA_END) { - if (train_data.size() < batch_size) - return false; - else - break; - } - if (trainReadyFlag == DATA_READY && train_data.size() >= batch_size) { - break; - } - } - - for (k = 0; k < batch_size; ++k) { - std::vector>> v_channel; - for (l = 0; l < channel; ++l) { - L = l * width * height; - std::vector> v_height; - for (j = 0; j < height; ++j) { - J = L + j * width; - std::vector v_width; - for (i = 0; i < width; ++i) { - v_width.push_back(train_data[k][J + i]); - } - v_height.push_back(v_width); - } - v_channel.push_back(v_height); - } - outVec.push_back(v_channel); - outLabel.push_back({{train_data_label[k]}}); - } + using QueueType = std::vector>; - data_lock.lock(); - - train_data.erase(train_data.begin(), train_data.begin() + batch_size); - train_data_label.erase(train_data_label.begin(), - train_data_label.begin() + batch_size); - cur_train_bufsize -= batch_size; - } break; - case BUF_VAL: { - std::vector list; + auto wait_for_data_fill = [](std::mutex &ready_mutex, + std::condition_variable &cv, DataStatus &flag, + const unsigned int batch_size, + QueueType &queue) { while (true) { - std::unique_lock ulval(readyValData); - cv_val.wait(ulval, [this]() -> bool { return valReadyFlag; }); - if (valReadyFlag == DATA_ERROR || valReadyFlag == DATA_END) { - if (val_data.size() < batch_size) - return false; - else - break; - } - if (valReadyFlag == DATA_READY && val_data.size() >= batch_size) { - break; - } - } + std::unique_lock ul(ready_mutex); + cv.wait(ul, [&]() -> bool { return flag; }); + if (flag == DATA_ERROR || flag == DATA_END) + return queue.size() < batch_size ? false : true; - for (k = 0; k < batch_size; ++k) { - std::vector>> v_channel; - for (l = 0; l < channel; ++l) { - L = l * width * height; - std::vector> v_height; - for (j = 0; j < height; ++j) { - J = L + j * width; - std::vector v_width; - for (i = 0; i < width; ++i) { - v_width.push_back(val_data[k][J + i]); - } - v_height.push_back(v_width); - } - v_channel.push_back(v_height); - } - outVec.push_back(v_channel); - outLabel.push_back({{val_data_label[k]}}); + if (flag == DATA_READY && queue.size() >= batch_size) + return true; } - data_lock.lock(); - - val_data.erase(val_data.begin(), val_data.begin() + batch_size); - val_data_label.erase(val_data_label.begin(), - val_data_label.begin() + batch_size); - cur_val_bufsize -= batch_size; - - } break; - case BUF_TEST: { - std::vector list; - while (true) { - std::unique_lock ultest(readyTestData); - cv_test.wait(ultest, [this]() -> bool { return testReadyFlag; }); - - if (testReadyFlag == DATA_ERROR || testReadyFlag == DATA_END) { - if (test_data.size() < batch_size) - return false; - else - break; - } - if (testReadyFlag == DATA_READY && test_data.size() >= batch_size) { - break; + throw std::logic_error("[getDataFromBuffer] control should not reach here"); + }; + + auto fill_bundled_data_from_queue = + [](std::mutex &q_lock, QueueType &q, const unsigned int batch_size, + const unsigned int feature_size, float *buf) { + for (unsigned int b = 0; b < batch_size; ++b) + std::copy(q[b].begin(), q[b].begin() + feature_size, + buf + b * feature_size); + + q_lock.lock(); + q.erase(q.begin(), q.begin() + batch_size); + q_lock.unlock(); + }; + + /// facade that wait for the databuffer to be filled and pass it to outparam + /// note that batch_size is passed as an argument because it can vary by + /// BUF_TYPE later... + auto fill_out_params = + [&](std::mutex &ready_mutex, std::condition_variable &cv, DataStatus &flag, + QueueType &data_q, QueueType &label_q, const unsigned int batch_size, + unsigned int &cur_bufsize) { + if (!wait_for_data_fill(ready_mutex, cv, flag, batch_size, data_q)) { + return false; } - } - for (k = 0; k < batch_size; ++k) { - std::vector>> v_channel; - for (l = 0; l < channel; ++l) { - L = l * width * height; - std::vector> v_height; - for (j = 0; j < height; ++j) { - J = L + j * width; - std::vector v_width; - for (i = 0; i < width; ++i) { - v_width.push_back(test_data[k][J + i]); - } - v_height.push_back(v_width); - } - v_channel.push_back(v_height); - } - outVec.push_back(v_channel); - outLabel.push_back({{test_data_label[k]}}); - } + fill_bundled_data_from_queue(data_lock, data_q, batch_size, + this->input_dim.getFeatureLen(), out); + fill_bundled_data_from_queue(data_lock, label_q, batch_size, + this->class_num, label); - data_lock.lock(); - test_data.erase(test_data.begin(), test_data.begin() + batch_size); - test_data_label.erase(test_data_label.begin(), - test_data_label.begin() + batch_size); - cur_test_bufsize -= batch_size; - } break; + cur_bufsize -= batch_size; + return true; + }; + + switch (type) { + case BUF_TRAIN: + if (!fill_out_params(readyTrainData, cv_train, trainReadyFlag, train_data, + train_data_label, batch_size, cur_train_bufsize)) + return false; + break; + case BUF_VAL: + if (!fill_out_params(readyValData, cv_val, valReadyFlag, val_data, + val_data_label, batch_size, cur_val_bufsize)) + return false; + break; + case BUF_TEST: + if (!fill_out_params(readyTestData, cv_test, testReadyFlag, test_data, + test_data_label, batch_size, cur_test_bufsize)) + return false; + break; default: ml_loge("Error: Not Supported Data Type"); return false; break; } - data_lock.unlock(); return true; } diff --git a/nntrainer/src/neuralnet.cpp b/nntrainer/src/neuralnet.cpp index c748f64..3eb1f04 100644 --- a/nntrainer/src/neuralnet.cpp +++ b/nntrainer/src/neuralnet.cpp @@ -448,12 +448,15 @@ int NeuralNetwork::train_run() { int count = 0; + sharedTensor in = MAKE_SHARED_TENSOR(getInputDimension()); + sharedTensor label = + MAKE_SHARED_TENSOR(layers.back()->getOutputDimension()); + while (true) { - vec_4d in, label; - if (data_buffer->getDataFromBuffer(nntrainer::BUF_TRAIN, in, label)) { + if (data_buffer->getDataFromBuffer(nntrainer::BUF_TRAIN, in->getData(), + label->getData())) { try { - backwarding(MAKE_SHARED_TENSOR(in), MAKE_SHARED_TENSOR(label), - iter++); + backwarding(in, label, iter++); } catch (...) { data_buffer->clear(nntrainer::BUF_TRAIN); ml_loge("Error: training error in #%d/%d.", epoch_idx, epochs); @@ -489,13 +492,11 @@ int NeuralNetwork::train_run() { } while (true) { - vec_4d in, label; - if (data_buffer->getDataFromBuffer(nntrainer::BUF_VAL, in, label)) { - sharedTensor X = MAKE_SHARED_TENSOR(Tensor({in})); - sharedTensor Y2 = MAKE_SHARED_TENSOR(Tensor({label})); - sharedConstTensor Y = forwarding(X, Y2); + if (data_buffer->getDataFromBuffer(nntrainer::BUF_VAL, in->getData(), + label->getData())) { + sharedConstTensor Y = forwarding(in, label); auto model_out = Y->argmax(); - auto label_out = Y2->argmax(); + auto label_out = label->argmax(); for (unsigned int b = 0; b < batch_size; b++) { if (model_out[b] == label_out[b]) right++; -- 2.7.4