From 8498a8bc7831198b13faf05f42dcc8eae18c91fc Mon Sep 17 00:00:00 2001 From: "jijoong.moon" Date: Tue, 18 May 2021 10:25:52 +0900 Subject: [PATCH] [ LSTM ] Add return_sequence This commit includes, . impelementation of return_sequence. If it is true, then it generate all the output of time iteration and if it false, then only last output is available. . add 'return_sequence' keyward Resolves: **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- api/ccapi/include/layer.h | 1 + nntrainer/layers/layer_internal.h | 4 +- nntrainer/layers/lstm.cpp | 118 +++++++++++++++++++++++++++++--------- nntrainer/layers/lstm.h | 34 ++++++----- nntrainer/utils/parse_util.cpp | 4 +- 5 files changed, 118 insertions(+), 43 deletions(-) diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h index 9f9f963..c85f33c 100644 --- a/api/ccapi/include/layer.h +++ b/api/ccapi/include/layer.h @@ -111,6 +111,7 @@ public: * - out_dim : int ( output dimesion for embedding layer ) * - in_length : int ( input length for embedding layer ) * - recurrent_activation : string (type) - used only in lstm + * - return_sequences : bool (type) - used only in lstm * - distribute : bool */ /** diff --git a/nntrainer/layers/layer_internal.h b/nntrainer/layers/layer_internal.h index 9133851..eb6d0a8 100644 --- a/nntrainer/layers/layer_internal.h +++ b/nntrainer/layers/layer_internal.h @@ -234,7 +234,8 @@ public: * 33. in_length : int ( input length for embedding layer ) * 34. recurrent_activation : string (type) - lstm * 35. distribute : bool - * 36. + * 36. split_dimension : string (type) + * 37. return_sequences : bool (type) - lstm */ enum class PropertyType { input_shape = 0, @@ -274,6 +275,7 @@ public: recurrent_activation = 34, distribute = 35, split_dimension = 36, + return_sequences = 37, unknown }; diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp index 13d8ec7..a9e8d24 100644 --- a/nntrainer/layers/lstm.cpp +++ b/nntrainer/layers/lstm.cpp @@ -41,10 +41,17 @@ int LSTMLayer::initialize(Manager &manager) { } // input_dim = [ batch, 1, time_iteration, feature_size ] - // outut_dim = [ batch, 1, time_iteration, hidden_size ( unit ) ] + // if return_sequences == False : + // output_dim = [ batch, 1, 1, hidden_size (unit)] + // else: + // output_dim = [ batch, 1, time_iteration, hidden_size ( unit ) ] output_dim[0] = input_dim[0]; output_dim[0].width(unit); + if (!return_sequences) { + output_dim[0].height(1); + } + TensorDim bias_dim = TensorDim(); bias_dim.setTensorDim(3, unit * NUM_GATE); @@ -81,14 +88,20 @@ int LSTMLayer::initialize(Manager &manager) { WeightRegularizer::NONE, 1.0f, true); } - mem_cell = - std::make_shared(output_dim[0], true, true, "LSTM:mem_cell"); + TensorDim d = input_dim[0]; + d.width(unit); + + mem_cell = std::make_shared(d, true, true, "LSTM:mem_cell"); mem_cell->getVariableRef().setZero(); mem_cell->getGradientRef().setZero(); - TensorDim d = input_dim[0]; - d.width(unit * NUM_GATE); + if (!return_sequences) { + hidden = std::make_shared(d, true, true, "LSTM:temp_hidden"); + hidden->getVariableRef().setZero(); + hidden->getGradientRef().setZero(); + } + d.width(unit * NUM_GATE); fgio = std::make_shared(d, true, true, "LSTM:fgio"); fgio->getVariableRef().setZero(); fgio->getGradientRef().setZero(); @@ -103,6 +116,16 @@ int LSTMLayer::initialize(Manager &manager) { c_prev = Tensor(cell_dim); c_prev.setZero(); + if (Layer::activation_type == ActivationType::ACT_NONE) { + Layer::activation_type = ActivationType::ACT_TANH; + acti_func.setActiFunc(activation_type); + } + + if (recurrent_activation_type == ActivationType::ACT_NONE) { + recurrent_activation_type = ActivationType::ACT_SIGMOID; + recurrent_acti_func.setActiFunc(recurrent_activation_type); + } + return status; } @@ -131,6 +154,12 @@ void LSTMLayer::setProperty(const PropertyType type, const std::string &value) { recurrent_acti_func.setActiFunc(acti_type); } break; + case PropertyType::return_sequences: + if (!value.empty()) { + status = setBoolean(return_sequences, value); + throw_status(status); + } + break; default: Layer::setProperty(type, value); break; @@ -153,7 +182,13 @@ void LSTMLayer::forwarding(bool training) { Tensor &bias_h = weightAt(static_cast(LSTMParams::bias_h)).getVariableRef(); - Tensor &hidden_ = net_hidden[0]->getVariableRef(); + Tensor hidden_; + if (!return_sequences) { + hidden_ = hidden->getVariableRef(); + } else { + hidden_ = net_hidden[0]->getVariableRef(); + } + Tensor &input_ = net_input[0]->getVariableRef(); Tensor &m_cell_ = mem_cell->getVariableRef(); @@ -189,20 +224,20 @@ void LSTMLayer::forwarding(bool training) { fgio_t.add_i(bias_h); fgio_t.add_i(xs.dot(weight_xh)); - Tensor hf = fgio_t.getSharedDataTensor({unit}, 0); - Tensor hg = fgio_t.getSharedDataTensor({unit}, unit); - Tensor hi = fgio_t.getSharedDataTensor({unit}, unit * 2); + Tensor hi = fgio_t.getSharedDataTensor({unit}, 0); + Tensor hf = fgio_t.getSharedDataTensor({unit}, unit); + Tensor hg = fgio_t.getSharedDataTensor({unit}, unit * 2); Tensor ho = fgio_t.getSharedDataTensor({unit}, unit * 3); - acti_func.run_fn(hf, hf); - acti_func.run_fn(hi, hi); - acti_func.run_fn(ho, ho); - recurrent_acti_func.run_fn(hg, hg); + recurrent_acti_func.run_fn(hf, hf); + recurrent_acti_func.run_fn(hi, hi); + recurrent_acti_func.run_fn(ho, ho); + acti_func.run_fn(hg, hg); hf.multiply(cs_prev, cs); cs.add_i(hg.multiply(hi)); - recurrent_acti_func.run_fn(cs, hs); + acti_func.run_fn(cs, hs); hs.multiply_i(ho); } // size of h_prev and hs size is same : unit. @@ -210,6 +245,16 @@ void LSTMLayer::forwarding(bool training) { h_prev.getBatchSlice(b, 1).copy(hs); c_prev.getBatchSlice(b, 1).copy(cs); } + + if (!return_sequences) { + TensorDim d = hidden_.getDim(); + for (unsigned int b = 0; b < input_dim[0].batch(); ++b) { + float *data = hidden_.getAddress(b * d.width() * d.height() + + (d.height() - 1) * d.width()); + float *rdata = net_hidden[0]->getVariableRef().getAddress(b * d.width()); + std::copy(data, data + d.width(), rdata); + } + } } void LSTMLayer::copy(std::shared_ptr l) { @@ -240,8 +285,25 @@ void LSTMLayer::calcGradient() { Tensor &weight_hh = weightAt(static_cast(LSTMParams::weight_hh)).getVariableRef(); - Tensor &derivative_ = net_hidden[0]->getGradientRef(); - Tensor &hidden_ = net_hidden[0]->getVariableRef(); + Tensor derivative_; + Tensor hidden_; + + if (!return_sequences) { + derivative_ = hidden->getGradientRef(); + TensorDim d = derivative_.getDim(); + for (unsigned int b = 0; b < input_dim[0].batch(); ++b) { + float *data = derivative_.getAddress(b * d.width() * d.height() + + (d.height() - 1) * d.width()); + float *rdata = net_hidden[0]->getGradientRef().getAddress(b * d.width()); + std::copy(rdata, rdata + d.width(), data); + } + + hidden_ = hidden->getVariableRef(); + } else { + derivative_ = net_hidden[0]->getGradientRef(); + hidden_ = net_hidden[0]->getVariableRef(); + } + Tensor &input_ = net_input[0]->getVariableRef(); Tensor &m_cell_ = mem_cell->getVariableRef(); Tensor &dm_cell_ = mem_cell->getGradientRef(); @@ -298,19 +360,19 @@ void LSTMLayer::calcGradient() { dh.add_i(dh_nx); } - Tensor dhf = dfgio_t.getSharedDataTensor({unit}, 0); - Tensor dhg = dfgio_t.getSharedDataTensor({unit}, unit); - Tensor dhi = dfgio_t.getSharedDataTensor({unit}, unit * 2); + Tensor dhi = dfgio_t.getSharedDataTensor({unit}, 0); + Tensor dhf = dfgio_t.getSharedDataTensor({unit}, unit); + Tensor dhg = dfgio_t.getSharedDataTensor({unit}, unit * 2); Tensor dho = dfgio_t.getSharedDataTensor({unit}, unit * 3); - Tensor hf = fgio_t.getSharedDataTensor({unit}, 0); - Tensor hg = fgio_t.getSharedDataTensor({unit}, unit); - Tensor hi = fgio_t.getSharedDataTensor({unit}, unit * 2); + Tensor hi = fgio_t.getSharedDataTensor({unit}, 0); + Tensor hf = fgio_t.getSharedDataTensor({unit}, unit); + Tensor hg = fgio_t.getSharedDataTensor({unit}, unit * 2); Tensor ho = fgio_t.getSharedDataTensor({unit}, unit * 3); - recurrent_acti_func.run_fn(cs, dho); + acti_func.run_fn(cs, dho); dho.multiply_i(dh); - recurrent_acti_func.run_prime_fn(cs, dc, ho); + acti_func.run_prime_fn(cs, dc, ho); dc.multiply_i(dh); dc.add_i(dc_nx); @@ -319,10 +381,10 @@ void LSTMLayer::calcGradient() { dc.multiply(hi, dhg); dc.multiply(hf, dc_nx); - acti_func.run_prime_fn(ho, dho, dho); - acti_func.run_prime_fn(hf, dhf, dhf); - acti_func.run_prime_fn(hi, dhi, dhi); - recurrent_acti_func.run_prime_fn(hg, dhg, dhg); + recurrent_acti_func.run_prime_fn(ho, dho, dho); + recurrent_acti_func.run_prime_fn(hf, dhf, dhf); + recurrent_acti_func.run_prime_fn(hi, dhi, dhi); + acti_func.run_prime_fn(hg, dhg, dhg); djdb_h.add_i(dfgio_t); diff --git a/nntrainer/layers/lstm.h b/nntrainer/layers/lstm.h index 68ee50e..9a38bdb 100644 --- a/nntrainer/layers/lstm.h +++ b/nntrainer/layers/lstm.h @@ -32,20 +32,17 @@ public: template LSTMLayer( unsigned int unit_ = 0, - ActivationType recurrent_activation_type_ = ActivationType::ACT_SIGMOID, - Args... args) : + ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE, + bool sequence = false, Args... args) : Layer(args...), - unit(unit_) { - /* Default Activation Type is tanh */ - if (getActivationType() == ActivationType::ACT_NONE) - setActivation(ActivationType::ACT_TANH); - setRecurrentActivation(recurrent_activation_type_); - } + unit(unit_), + recurrent_activation_type(recurrent_activation_type_), + return_sequences(sequence){}; /** * @brief Destructor of LSTMLayer */ - ~LSTMLayer(){}; + ~LSTMLayer() = default; /** * @brief Move constructor. @@ -125,17 +122,17 @@ private: unsigned int unit; /** - * @brief activation function for h_t : default is tanh + * @brief activation function for h_t : default is sigmoid */ ActiFunc acti_func; /** - * @brief activation type for recurrent : default is sigmoid + * @brief activation type for recurrent : default is tanh */ ActivationType recurrent_activation_type; /** - * @brief activation function for recurrent : default is sigmoid + * @brief activation function for recurrent : default is tanh */ ActiFunc recurrent_acti_func; @@ -153,11 +150,22 @@ private: * @brief To save cell data */ std::shared_ptr mem_cell; - + /** * @brief To save intermediate gates */ std::shared_ptr fgio; + + /** + * @brief hidden state + */ + std::shared_ptr hidden; + + /** + * @brief variable to set return sequences + */ + bool return_sequences; + }; } // namespace nntrainer diff --git a/nntrainer/utils/parse_util.cpp b/nntrainer/utils/parse_util.cpp index fe622a5..23c681e 100644 --- a/nntrainer/utils/parse_util.cpp +++ b/nntrainer/utils/parse_util.cpp @@ -256,6 +256,7 @@ unsigned int parseType(std::string ll, InputType t) { * recurrent_activation = 34 * distribute = 35 * split_dimension = 36 + * return_sequences = 37 * * InputLayer has 0, 1, 2, 3 properties. * FullyConnectedLayer has 1, 4, 6, 7, 8, 9 properties. @@ -263,7 +264,7 @@ unsigned int parseType(std::string ll, InputType t) { * Pooling2DLayer has 12, 13, 14, 15 properties. * BatchNormalizationLayer has 0, 1, 5, 6, 7 properties. */ -static std::array property_string = { +static std::array property_string = { "input_shape", "normalization", "standardization", @@ -301,6 +302,7 @@ static std::array property_string = { "recurrent_activation", "distribute", "split_dimension", + "return_sequences", "unknown"}; unsigned int parseLayerProperty(std::string property) { -- 2.7.4