From 31e7e9ca3181d090cf5d7e60509467549eb5224c Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Thu, 7 Jan 2021 15:50:33 +0900 Subject: [PATCH] [Layer] Add eval mode for the training **Changes proposed in this PR:** - This patch add eval mode for the training forward and fix batch normalization layer accordingly **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- Applications/Custom/LayerClient/jni/pow.cpp | 2 +- Applications/Custom/LayerClient/jni/pow.h | 3 ++- nntrainer/graph/network_graph.cpp | 4 ++-- nntrainer/graph/network_graph.h | 3 ++- nntrainer/layers/activation_layer.cpp | 2 +- nntrainer/layers/activation_layer.h | 4 ++-- nntrainer/layers/addition_layer.cpp | 2 +- nntrainer/layers/addition_layer.h | 4 ++-- nntrainer/layers/bn_layer.cpp | 5 ++--- nntrainer/layers/bn_layer.h | 4 ++-- nntrainer/layers/concat_layer.cpp | 2 +- nntrainer/layers/concat_layer.h | 4 ++-- nntrainer/layers/conv2d_layer.cpp | 2 +- nntrainer/layers/conv2d_layer.h | 4 ++-- nntrainer/layers/fc_layer.cpp | 2 +- nntrainer/layers/fc_layer.h | 4 ++-- nntrainer/layers/flatten_layer.cpp | 2 +- nntrainer/layers/flatten_layer.h | 4 ++-- nntrainer/layers/input_layer.cpp | 2 +- nntrainer/layers/input_layer.h | 5 ++--- nntrainer/layers/layer_internal.h | 2 +- nntrainer/layers/loss_layer.cpp | 3 ++- nntrainer/layers/loss_layer.h | 4 ++-- nntrainer/layers/nnstreamer_layer.cpp | 2 +- nntrainer/layers/nnstreamer_layer.h | 4 ++-- nntrainer/layers/output_layer.cpp | 2 +- nntrainer/layers/output_layer.h | 5 ++--- nntrainer/layers/pooling2d_layer.cpp | 2 +- nntrainer/layers/pooling2d_layer.h | 4 ++-- nntrainer/layers/tflite_layer.cpp | 2 +- nntrainer/layers/tflite_layer.h | 4 ++-- nntrainer/models/neuralnet.cpp | 17 +++++++++-------- nntrainer/models/neuralnet.h | 5 +++-- 33 files changed, 61 insertions(+), 59 deletions(-) diff --git a/Applications/Custom/LayerClient/jni/pow.cpp b/Applications/Custom/LayerClient/jni/pow.cpp index f34d408..62c6b78 100644 --- a/Applications/Custom/LayerClient/jni/pow.cpp +++ b/Applications/Custom/LayerClient/jni/pow.cpp @@ -100,7 +100,7 @@ int PowLayer::initialize(nntrainer::Manager &manager) { return 0; } -void PowLayer::forwarding() { +void PowLayer::forwarding(bool training) { #ifdef DEBUG /// intended here to demonstrate that PowLayer::forwarding is being called std::cout << "pow layer forward is called\n"; diff --git a/Applications/Custom/LayerClient/jni/pow.h b/Applications/Custom/LayerClient/jni/pow.h index f70af98..7847715 100644 --- a/Applications/Custom/LayerClient/jni/pow.h +++ b/Applications/Custom/LayerClient/jni/pow.h @@ -59,8 +59,9 @@ public: /** * @brief nntrainer forwarding function + * @param[in] training true if forwarding is on training */ - void forwarding(); + void forwarding(bool training = true) override; /** * @brief calc the derivative to be passed to the previous layer diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 5a387f5..4ffd520 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -508,10 +508,10 @@ void NetworkGraph::setBatchSize(unsigned int batch_size) { } } -sharedConstTensors NetworkGraph::forwarding() { +sharedConstTensors NetworkGraph::forwarding(bool training) { for (auto const &ln : Sorted) { START_PROFILE(ln.event_key); - ln.layer->forwarding(); + ln.layer->forwarding(training); END_PROFILE(ln.event_key); } diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h index bdea2cf..8adbb9f 100644 --- a/nntrainer/graph/network_graph.h +++ b/nntrainer/graph/network_graph.h @@ -188,9 +188,10 @@ public: /** * @brief forwarding network graph * @param[in] input data + * @param[in] training true if forwarding is on training * @retval output tensors */ - sharedConstTensors forwarding(); + sharedConstTensors forwarding(bool training = false); /** * @brief getter of ordered graph diff --git a/nntrainer/layers/activation_layer.cpp b/nntrainer/layers/activation_layer.cpp index 9ea2747..401d21f 100644 --- a/nntrainer/layers/activation_layer.cpp +++ b/nntrainer/layers/activation_layer.cpp @@ -46,7 +46,7 @@ int ActivationLayer::initialize(Manager &manager) { return ML_ERROR_NONE; } -void ActivationLayer::forwarding() { +void ActivationLayer::forwarding(bool training) { Tensor &hidden_ = net_hidden[0]->getVariableRef(); /// @note @a _act_fn is expected to work out of place and not modify @a input _act_fn(net_input[0]->getVariableRef(), hidden_); diff --git a/nntrainer/layers/activation_layer.h b/nntrainer/layers/activation_layer.h index daf9a73..78f5939 100644 --- a/nntrainer/layers/activation_layer.h +++ b/nntrainer/layers/activation_layer.h @@ -63,9 +63,9 @@ public: void save(std::ofstream &file){/* noop */}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/addition_layer.cpp b/nntrainer/layers/addition_layer.cpp index 6904965..519f2e7 100644 --- a/nntrainer/layers/addition_layer.cpp +++ b/nntrainer/layers/addition_layer.cpp @@ -41,7 +41,7 @@ int AdditionLayer::initialize(Manager &manager) { return status; } -void AdditionLayer::forwarding() { +void AdditionLayer::forwarding(bool training) { Tensor &hidden_ = net_hidden[0]->getVariableRef(); TensorDim &in_dim = input_dim[0]; diff --git a/nntrainer/layers/addition_layer.h b/nntrainer/layers/addition_layer.h index 2ee983b..824cf63 100644 --- a/nntrainer/layers/addition_layer.h +++ b/nntrainer/layers/addition_layer.h @@ -72,9 +72,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index 8a435fc..4142af4 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -125,7 +125,7 @@ void BatchNormalizationLayer::setProperty(const PropertyType type, } } -void BatchNormalizationLayer::forwarding() { +void BatchNormalizationLayer::forwarding(bool training) { Tensor &mu = weightAt(BNParams::mu).getVariableRef(); Tensor &var = weightAt(BNParams::var).getVariableRef(); Tensor &gamma = weightAt(BNParams::gamma).getVariableRef(); @@ -134,8 +134,7 @@ void BatchNormalizationLayer::forwarding() { Tensor &input_ = net_input[0]->getVariableRef(); Tensor &hidden_ = net_hidden[0]->getVariableRef(); - /// @todo change trainable to train/eval mode #524 - if (trainable) { + if (training) { Tensor cmu = input_.average(axes_to_reduce); deviation = input_.subtract(cmu); diff --git a/nntrainer/layers/bn_layer.h b/nntrainer/layers/bn_layer.h index 192328c..5b50921 100644 --- a/nntrainer/layers/bn_layer.h +++ b/nntrainer/layers/bn_layer.h @@ -76,9 +76,9 @@ public: BatchNormalizationLayer &operator=(BatchNormalizationLayer &&rhs) = default; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/concat_layer.cpp b/nntrainer/layers/concat_layer.cpp index 7e00c25..02a2856 100644 --- a/nntrainer/layers/concat_layer.cpp +++ b/nntrainer/layers/concat_layer.cpp @@ -51,7 +51,7 @@ int ConcatLayer::initialize(Manager &manager) { return status; } -void ConcatLayer::forwarding() { +void ConcatLayer::forwarding(bool training) { Tensor &hidden_ = net_hidden[0]->getVariableRef(); #ifdef DEBUG diff --git a/nntrainer/layers/concat_layer.h b/nntrainer/layers/concat_layer.h index 4fd5d31..f9f68ab 100644 --- a/nntrainer/layers/concat_layer.h +++ b/nntrainer/layers/concat_layer.h @@ -72,9 +72,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp index b26fdda..40315eb 100644 --- a/nntrainer/layers/conv2d_layer.cpp +++ b/nntrainer/layers/conv2d_layer.cpp @@ -100,7 +100,7 @@ int Conv2DLayer::initialize(Manager &manager) { return status; } -void Conv2DLayer::forwarding() { +void Conv2DLayer::forwarding(bool training) { int status = ML_ERROR_NONE; if (num_inputs != 1) diff --git a/nntrainer/layers/conv2d_layer.h b/nntrainer/layers/conv2d_layer.h index d2712bd..e3b5dfa 100644 --- a/nntrainer/layers/conv2d_layer.h +++ b/nntrainer/layers/conv2d_layer.h @@ -73,9 +73,9 @@ public: int initialize(Manager &manager); /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index a540ff5..7e10047 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -82,7 +82,7 @@ void FullyConnectedLayer::setProperty(const PropertyType type, } } -void FullyConnectedLayer::forwarding() { +void FullyConnectedLayer::forwarding(bool training) { Tensor &weight = weightAt(static_cast(FCParams::weight)).getVariableRef(); Tensor &bias = weightAt(static_cast(FCParams::bias)).getVariableRef(); diff --git a/nntrainer/layers/fc_layer.h b/nntrainer/layers/fc_layer.h index 9122161..dee02ed 100644 --- a/nntrainer/layers/fc_layer.h +++ b/nntrainer/layers/fc_layer.h @@ -52,9 +52,9 @@ public: FullyConnectedLayer &operator=(FullyConnectedLayer &&rhs) = default; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/flatten_layer.cpp b/nntrainer/layers/flatten_layer.cpp index 86c479b..7b4ad55 100644 --- a/nntrainer/layers/flatten_layer.cpp +++ b/nntrainer/layers/flatten_layer.cpp @@ -41,7 +41,7 @@ int FlattenLayer::initialize(Manager &manager) { return status; } -void FlattenLayer::forwarding() { +void FlattenLayer::forwarding(bool training) { Tensor temp = net_input[0]->getVariableRef(); temp.reshape(net_hidden[0]->getDim()); net_hidden[0]->getVariableRef() = temp; diff --git a/nntrainer/layers/flatten_layer.h b/nntrainer/layers/flatten_layer.h index 026eaa3..958656e 100644 --- a/nntrainer/layers/flatten_layer.h +++ b/nntrainer/layers/flatten_layer.h @@ -70,9 +70,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/input_layer.cpp b/nntrainer/layers/input_layer.cpp index 2b7b638..af84e7a 100644 --- a/nntrainer/layers/input_layer.cpp +++ b/nntrainer/layers/input_layer.cpp @@ -53,7 +53,7 @@ void InputLayer::setProperty(const PropertyType type, } } -void InputLayer::forwarding() { +void InputLayer::forwarding(bool training) { Tensor &hidden_ = net_hidden[0]->getVariableRef(); hidden_ = net_input[0]->getVariableRef(); diff --git a/nntrainer/layers/input_layer.h b/nntrainer/layers/input_layer.h index 12286cd..aefd245 100644 --- a/nntrainer/layers/input_layer.h +++ b/nntrainer/layers/input_layer.h @@ -75,10 +75,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); - + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() */ diff --git a/nntrainer/layers/layer_internal.h b/nntrainer/layers/layer_internal.h index d9ace65..9438d29 100644 --- a/nntrainer/layers/layer_internal.h +++ b/nntrainer/layers/layer_internal.h @@ -103,7 +103,7 @@ public: * @param[in] in List of Input Tensors taken by this layer * @retval List of Output Tensors */ - virtual void forwarding() = 0; + virtual void forwarding(bool training = true) = 0; /** * @brief Forward Propagation of a layer diff --git a/nntrainer/layers/loss_layer.cpp b/nntrainer/layers/loss_layer.cpp index f3b0760..d58582e 100644 --- a/nntrainer/layers/loss_layer.cpp +++ b/nntrainer/layers/loss_layer.cpp @@ -42,7 +42,8 @@ int LossLayer::initialize(Manager &manager) { return status; } -void LossLayer::forwarding() { +void LossLayer::forwarding(bool training) { + /// @todo loss layer can be determined with training variable Tensor &hidden_ = net_hidden[0]->getVariableRef(); Tensor y = net_input[0]->getVariableRef(); Tensor l; diff --git a/nntrainer/layers/loss_layer.h b/nntrainer/layers/loss_layer.h index dfd4f99..da5b4ee 100644 --- a/nntrainer/layers/loss_layer.h +++ b/nntrainer/layers/loss_layer.h @@ -53,9 +53,9 @@ public: ~LossLayer(){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/nnstreamer_layer.cpp b/nntrainer/layers/nnstreamer_layer.cpp index 666cbf0..04a9114 100644 --- a/nntrainer/layers/nnstreamer_layer.cpp +++ b/nntrainer/layers/nnstreamer_layer.cpp @@ -165,7 +165,7 @@ void NNStreamerLayer::setProperty(const PropertyType type, } } -void NNStreamerLayer::forwarding() { +void NNStreamerLayer::forwarding(bool training) { size_t data_size; Tensor &input = net_input[0]->getVariableRef(); Tensor &hidden_ = net_hidden[0]->getVariableRef(); diff --git a/nntrainer/layers/nnstreamer_layer.h b/nntrainer/layers/nnstreamer_layer.h index 4310168..b29b998 100644 --- a/nntrainer/layers/nnstreamer_layer.h +++ b/nntrainer/layers/nnstreamer_layer.h @@ -50,9 +50,9 @@ public: ~NNStreamerLayer(); /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/output_layer.cpp b/nntrainer/layers/output_layer.cpp index 47ae2fb..1a35cbe 100644 --- a/nntrainer/layers/output_layer.cpp +++ b/nntrainer/layers/output_layer.cpp @@ -43,7 +43,7 @@ int OutputLayer::initialize(Manager &manager) { return status; } -void OutputLayer::forwarding() { +void OutputLayer::forwarding(bool training) { Tensor &input_ = net_input[0]->getVariableRef(); for (unsigned int idx = 0; idx < num_outputs; ++idx) { net_hidden[idx]->getVariableRef() = input_; diff --git a/nntrainer/layers/output_layer.h b/nntrainer/layers/output_layer.h index de29e0d..616c54e 100644 --- a/nntrainer/layers/output_layer.h +++ b/nntrainer/layers/output_layer.h @@ -72,10 +72,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); - + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() */ diff --git a/nntrainer/layers/pooling2d_layer.cpp b/nntrainer/layers/pooling2d_layer.cpp index 8dce272..d81f5a1 100644 --- a/nntrainer/layers/pooling2d_layer.cpp +++ b/nntrainer/layers/pooling2d_layer.cpp @@ -66,7 +66,7 @@ int Pooling2DLayer::initialize(Manager &manager) { return status; } -void Pooling2DLayer::forwarding() { +void Pooling2DLayer::forwarding(bool training) { Tensor &input_ = net_input[0]->getVariableRef(); Tensor &hidden_ = net_hidden[0]->getVariableRef(); diff --git a/nntrainer/layers/pooling2d_layer.h b/nntrainer/layers/pooling2d_layer.h index 6be8323..175da9d 100644 --- a/nntrainer/layers/pooling2d_layer.h +++ b/nntrainer/layers/pooling2d_layer.h @@ -90,9 +90,9 @@ public: void save(std::ofstream &file){}; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/layers/tflite_layer.cpp b/nntrainer/layers/tflite_layer.cpp index e9e9fa6..d3a78f3 100644 --- a/nntrainer/layers/tflite_layer.cpp +++ b/nntrainer/layers/tflite_layer.cpp @@ -96,7 +96,7 @@ void TfLiteLayer::setProperty(const PropertyType type, } } -void TfLiteLayer::forwarding() { +void TfLiteLayer::forwarding(bool training) { #ifdef DEBUG std::vector dims; if (net_input.size() != input_dim.size()) diff --git a/nntrainer/layers/tflite_layer.h b/nntrainer/layers/tflite_layer.h index 30d124f..456af80 100644 --- a/nntrainer/layers/tflite_layer.h +++ b/nntrainer/layers/tflite_layer.h @@ -47,9 +47,9 @@ public: ~TfLiteLayer() = default; /** - * @copydoc Layer::forwarding() + * @copydoc Layer::forwarding(bool training) */ - void forwarding(); + void forwarding(bool training = true) override; /** * @copydoc Layer::calcDerivative() diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 80d4e72..51cc04c 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -283,15 +283,16 @@ NeuralNetwork::~NeuralNetwork() { /** * @brief forward propagation using layers object which has layer */ -sharedConstTensors NeuralNetwork::forwarding() { - return model_graph.forwarding(); +sharedConstTensors NeuralNetwork::forwarding(bool training) { + return model_graph.forwarding(training); } /** * @brief forward propagation using layers object which has layer */ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input, - sharedConstTensors label) { + sharedConstTensors label, + bool training) { if (input[0]->getDim().batch() > batch_size) throw std::logic_error("Error: mismatch in batchsize for data and model."); @@ -306,7 +307,7 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input, first_layer->net_input[0]->getVariableRef() = *input[0].get(); - return forwarding(); + return forwarding(training); } /** @@ -451,11 +452,11 @@ sharedConstTensors NeuralNetwork::inference(sharedConstTensors X) { sharedConstTensors out; try { START_PROFILE(profile::NN_FORWARD); - forwarding(X); + forwarding(X, {}, false); END_PROFILE(profile::NN_FORWARD); /** Forward loss layer without label as well */ std::static_pointer_cast(model_graph.Sorted.back().layer) - ->forwarding(); + ->forwarding(false); } catch (...) { ml_loge("Failed to inference Model"); return out; @@ -549,7 +550,7 @@ int NeuralNetwork::train_run() { if (data_buffer->getDataFromBuffer(nntrainer::BufferType::BUF_TRAIN, in.getData(), label.getData())) { try { - forwarding(); + forwarding(true); backwarding(iter++); } catch (...) { data_buffer->clear(nntrainer::BufferType::BUF_TRAIN); @@ -589,7 +590,7 @@ int NeuralNetwork::train_run() { while (true) { if (data_buffer->getDataFromBuffer(nntrainer::BufferType::BUF_VAL, in.getData(), label.getData())) { - forwarding(); + forwarding(false); auto model_out = output.argmax(); auto label_out = label.argmax(); for (unsigned int b = 0; b < batch_size; b++) { diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 76eb65d..a360b02 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -192,7 +192,7 @@ public: /** * @brief Forward Propagation of the neural network */ - sharedConstTensors forwarding(); + sharedConstTensors forwarding(bool training = true); /** * @brief Forward Propagation of the neural network @@ -201,7 +201,8 @@ public: * @retval List of Output Tensors */ sharedConstTensors forwarding(sharedConstTensors input, - sharedConstTensors label = {}); + sharedConstTensors label = {}, + bool training = true); /** * @brief Backward Propagation of the neural network -- 2.7.4