From 369eb18771229c118ecb7e71fd82fcfccd5270e3 Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Tue, 22 Jun 2021 09:53:12 +0900 Subject: [PATCH] [ Recurrent ] Implement Dropout for Recurrent Net In this commit, drop out for recurrent network is intrduced. dropout property is introduced and if the element value of random tensor is smaller than dropout rate, then it will be set zero. The element which is not set zero, then it will scale with 1.0/(1.0-dropout). **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- api/ccapi/include/layer.h | 1 + nntrainer/layers/gru.cpp | 10 ++++++++++ nntrainer/layers/gru.h | 10 ++++++++-- nntrainer/layers/layer_internal.h | 2 ++ nntrainer/layers/lstm.cpp | 11 +++++++++++ nntrainer/layers/lstm.h | 10 ++++++++-- nntrainer/layers/rnn.cpp | 10 ++++++++++ nntrainer/layers/rnn.h | 10 ++++++++-- nntrainer/tensor/tensor.cpp | 18 ++++++++++++++++++ nntrainer/tensor/tensor.h | 7 +++++++ nntrainer/utils/parse_util.cpp | 4 +++- 11 files changed, 86 insertions(+), 7 deletions(-) diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h index 759ce7a..794269d 100644 --- a/api/ccapi/include/layer.h +++ b/api/ccapi/include/layer.h @@ -116,6 +116,7 @@ public: * - return_sequences : bool (type) - used only in lstm * - distribute : bool * - hidden_state_activation : string (type) - used only in lstm + * - drop_out : float (type) - drop out rate */ /** * @brief set Property of layer diff --git a/nntrainer/layers/gru.cpp b/nntrainer/layers/gru.cpp index f5cb230..cb8fc85 100644 --- a/nntrainer/layers/gru.cpp +++ b/nntrainer/layers/gru.cpp @@ -161,6 +161,12 @@ void GRULayer::setProperty(const PropertyType type, const std::string &value) { throw_status(status); } break; + case PropertyType::dropout: + if (!value.empty()) { + status = setFloat(dropout_rate, value); + throw_status(status); + } + break; default: LayerV1::setProperty(type, value); break; @@ -207,6 +213,10 @@ void GRULayer::forwarding(bool training) { for (unsigned int t = 0; t < islice.height(); ++t) { Tensor xs = islice.getSharedDataTensor({islice.width()}, t * islice.width()); + + if (dropout_rate > 0.0 && training) { + xs.multiply_i(xs.dropout_mask(dropout_rate)); + } hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width()); Tensor zrg_t = zrg_.getSharedDataTensor({unit * NUM_GATE}, unit * t * NUM_GATE); diff --git a/nntrainer/layers/gru.h b/nntrainer/layers/gru.h index f920799..d318d29 100644 --- a/nntrainer/layers/gru.h +++ b/nntrainer/layers/gru.h @@ -34,12 +34,13 @@ public: unsigned int unit_ = 0, ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE, ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE, - bool sequence = false, Args... args) : + bool sequence = false, float dropout = 0.0, Args... args) : LayerV1(args...), unit(unit_), hidden_state_activation_type(hidden_state_activation_type_), recurrent_activation_type(recurrent_activation_type_), - return_sequences(sequence){}; + return_sequences(sequence), + dropout_rate(dropout){}; /** * @brief Destructor of GRULayer @@ -162,6 +163,11 @@ private: * @brief variable to set return sequences */ bool return_sequences; + + /** + * @brief drop out rate + */ + float dropout_rate; }; } // namespace nntrainer diff --git a/nntrainer/layers/layer_internal.h b/nntrainer/layers/layer_internal.h index 3e03848..527005a 100644 --- a/nntrainer/layers/layer_internal.h +++ b/nntrainer/layers/layer_internal.h @@ -248,6 +248,7 @@ public: * 36. split_dimension : string (type) * 37. return_sequences : bool (type) - lstm * 39. hidden_state_activation : string (type) - lstm + * 40. dropout : float (type) - drop out rate */ enum class PropertyType { input_shape = 0, @@ -289,6 +290,7 @@ public: split_dimension = 36, return_sequences = 37, hidden_state_activation = 38, + dropout = 39, unknown }; diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp index 199ff7a..a45b01e 100644 --- a/nntrainer/layers/lstm.cpp +++ b/nntrainer/layers/lstm.cpp @@ -146,6 +146,12 @@ void LSTMLayer::setProperty(const PropertyType type, const std::string &value) { throw_status(status); } break; + case PropertyType::dropout: + if (!value.empty()) { + status = setFloat(dropout_rate, value); + throw_status(status); + } + break; default: LayerV1::setProperty(type, value); break; @@ -194,6 +200,11 @@ void LSTMLayer::forwarding(bool training) { for (unsigned int t = 0; t < islice.height(); ++t) { Tensor xs = islice.getSharedDataTensor({islice.width()}, t * islice.width()); + + if (dropout_rate > 0.0 && training) { + xs.multiply_i(xs.dropout_mask(dropout_rate)); + } + hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width()); cs = cell.getSharedDataTensor({cell.width()}, t * cell.width()); Tensor fgio_t = diff --git a/nntrainer/layers/lstm.h b/nntrainer/layers/lstm.h index c88893a..c1f228f 100644 --- a/nntrainer/layers/lstm.h +++ b/nntrainer/layers/lstm.h @@ -34,12 +34,13 @@ public: unsigned int unit_ = 0, ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE, ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE, - bool sequence = false, Args... args) : + bool sequence = false, float dropout = 0.0, Args... args) : LayerV1(args...), unit(unit_), hidden_state_activation_type(hidden_state_activation_type_), recurrent_activation_type(recurrent_activation_type_), - return_sequences(sequence){}; + return_sequences(sequence), + dropout_rate(dropout){}; /** * @brief Destructor of LSTMLayer @@ -172,6 +173,11 @@ private: * @brief variable to set return sequences */ bool return_sequences; + + /** + * @brief drop out rate + */ + float dropout_rate; }; } // namespace nntrainer diff --git a/nntrainer/layers/rnn.cpp b/nntrainer/layers/rnn.cpp index 0373ca7..941de0a 100644 --- a/nntrainer/layers/rnn.cpp +++ b/nntrainer/layers/rnn.cpp @@ -122,6 +122,12 @@ void RNNLayer::setProperty(const PropertyType type, const std::string &value) { throw_status(status); } break; + case PropertyType::dropout: + if (!value.empty()) { + status = setFloat(dropout_rate, value); + throw_status(status); + } + break; default: LayerV1::setProperty(type, value); break; @@ -160,6 +166,10 @@ void RNNLayer::forwarding(bool training) { Tensor xs = islice.getSharedDataTensor({islice.width()}, t * islice.width()); + if (dropout_rate > 0.0 && training) { + xs.multiply_i(xs.dropout_mask(dropout_rate)); + } + hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width()); if (t > 0) { hs_prev = oslice.getSharedDataTensor({oslice.width()}, diff --git a/nntrainer/layers/rnn.h b/nntrainer/layers/rnn.h index 28b7a2e..7954d13 100644 --- a/nntrainer/layers/rnn.h +++ b/nntrainer/layers/rnn.h @@ -33,11 +33,12 @@ public: RNNLayer( unsigned int unit_ = 0, ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE, - bool sequence = false, Args... args) : + bool sequence = false, float dropout = 0.0, Args... args) : LayerV1(args...), unit(unit_), hidden_state_activation_type(hidden_state_activation_type_), - return_sequences(sequence){}; + return_sequences(sequence), + dropout_rate(dropout){}; /** * @brief Destructor of RNNLayer @@ -127,6 +128,11 @@ private: bool return_sequences; /** + * @brief drop out rate + */ + float dropout_rate; + + /** * @brief hidden variable for rnn */ std::shared_ptr hidden; diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index 6b95376..30f645f 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -870,6 +870,24 @@ Tensor Tensor::transpose(const std::string &direction) const { return result; } +Tensor Tensor::dropout_mask(float dropout) const { + Tensor result(dim); + result.setValue(1.0); + Tensor rand_temp(dim); + rand_temp.setRandUniform(0.0, 1.0); + float scale = 1.0 / (1 - dropout); + + float *mask = result.getData(); + float *random = rand_temp.getData(); + for (unsigned int i = 0; i < length(); ++i) { + if (random[i] >= dropout) + mask[i] = mask[i] * scale; + else + mask[i] = 0.0; + } + return result; +} + int Tensor::apply_i(std::function f) { float *data = getData(); diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 5a98da7..84a2338 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -515,6 +515,13 @@ public: Tensor &transpose(const std::string &direction, Tensor &out) const; /** + * @brief Calculate Drop Out Mask : x * 1.0/(1.0-rate) + * @param dropout drop out rate + * @retval Tensor& reference of drop out mask + */ + Tensor dropout_mask(float dropout) const; + + /** * @brief sum all the Tensor elements according to the batch * @retval Calculated Tensor(batch, 1, 1, 1) */ diff --git a/nntrainer/utils/parse_util.cpp b/nntrainer/utils/parse_util.cpp index ee15de0..5c34b68 100644 --- a/nntrainer/utils/parse_util.cpp +++ b/nntrainer/utils/parse_util.cpp @@ -258,6 +258,7 @@ unsigned int parseType(std::string ll, InputType t) { * split_dimension = 36 * return_sequences = 37 * hidden_state_activation = 38 + * dropout = 39 * * InputLayer has 0, 1, 2, 3 properties. * FullyConnectedLayer has 1, 4, 6, 7, 8, 9 properties. @@ -265,7 +266,7 @@ unsigned int parseType(std::string ll, InputType t) { * Pooling2DLayer has 12, 13, 14, 15 properties. * BatchNormalizationLayer has 0, 1, 5, 6, 7 properties. */ -static std::array property_string = { +static std::array property_string = { "input_shape", "normalization", "standardization", @@ -305,6 +306,7 @@ static std::array property_string = { "split_dimension", "return_sequences", "hidden_state_activation", + "dropout", "unknown"}; unsigned int parseLayerProperty(std::string property) { -- 2.7.4