From: jijoong.moon Date: Fri, 28 May 2021 02:24:46 +0000 (+0900) Subject: [ RNN ] Add Multi-Layer RNN Unit Tests X-Git-Tag: accepted/tizen/unified/20210829.234903~299 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bae6042f7a222c233852aaf755d1641903ec95ca;p=platform%2Fcore%2Fml%2Fnntrainer.git [ RNN ] Add Multi-Layer RNN Unit Tests This commit includes, . Multi-Layered RNN Unit Tests . Some Fixes to run **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon --- diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp index fcb50b8..f997e78 100644 --- a/nntrainer/layers/lstm.cpp +++ b/nntrainer/layers/lstm.cpp @@ -177,8 +177,7 @@ void LSTMLayer::forwarding(bool training) { h_prev.setZero(); c_prev.setZero(); - Tensor hidden_; - hidden_ = hidden->getVariableRef(); + Tensor &hidden_ = hidden->getVariableRef(); Tensor &input_ = net_input[0]->getVariableRef(); Tensor &m_cell_ = mem_cell->getVariableRef(); @@ -260,6 +259,7 @@ void LSTMLayer::copy(std::shared_ptr l) { this->acti_func = from->acti_func; this->recurrent_activation_type = from->recurrent_activation_type; this->recurrent_acti_func = from->recurrent_acti_func; + this->return_sequences = from->return_sequences; } void LSTMLayer::calcDerivative() { @@ -289,7 +289,6 @@ void LSTMLayer::calcGradient() { fgio->getGradientRef().setZero(); Tensor derivative_ = hidden->getGradientRef(); - Tensor hidden_; if (!return_sequences) { TensorDim d = derivative_.getDim(); @@ -306,7 +305,7 @@ void LSTMLayer::calcGradient() { derivative_.getData()); } - hidden_ = hidden->getVariableRef(); + Tensor &hidden_ = hidden->getVariableRef(); Tensor &input_ = net_input[0]->getVariableRef(); Tensor &m_cell_ = mem_cell->getVariableRef(); diff --git a/nntrainer/layers/rnn.cpp b/nntrainer/layers/rnn.cpp index 57785d2..0053ea5 100644 --- a/nntrainer/layers/rnn.cpp +++ b/nntrainer/layers/rnn.cpp @@ -83,7 +83,6 @@ int RNNLayer::initialize(Manager &manager) { bias_dim.batch(input_dim[0].batch()); h_prev = Tensor(bias_dim); - h_prev.setZero(); TensorDim d = input_dim[0]; d.width(unit); @@ -92,8 +91,6 @@ int RNNLayer::initialize(Manager &manager) { // test will fail. Becuase it modifies the date during gradient calculation // TODO : We could control with something like #define test to save memory hidden = std::make_shared(d, true, true, "RNN:temp_hidden"); - hidden->getVariableRef().setZero(); - hidden->getGradientRef().setZero(); if (Layer::activation_type == ActivationType::ACT_NONE) { Layer::activation_type = ActivationType::ACT_TANH; @@ -142,32 +139,32 @@ void RNNLayer::forwarding(bool training) { Tensor &bias_h = weightAt(static_cast(RNNParams::bias_h)).getVariableRef(); - Tensor hidden_; - hidden_ = hidden->getVariableRef(); + hidden->getVariableRef().setZero(); + + if (training) { + hidden->getGradientRef().setZero(); + } + h_prev.setZero(); + Tensor &hidden_ = hidden->getVariableRef(); Tensor &input_ = net_input[0]->getVariableRef(); Tensor temp; Tensor hs_prev; Tensor hs; - if (training) - h_prev.setZero(); - // TODO : check merge b and t index for (unsigned int b = 0; b < input_dim[0].batch(); ++b) { Tensor islice = input_.getBatchSlice(b, 1); Tensor oslice = hidden_.getBatchSlice(b, 1); for (unsigned int t = 0; t < islice.height(); ++t) { - Tensor xs = islice.getSharedDataTensor(TensorDim(1, 1, 1, islice.width()), - t * islice.width()); - // Calculate Hidden - // activation(xs.dot(weight_xh).add(hs_prev.dot(weight_hh).add(bias_h))); - hs = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()), - t * oslice.width()); + Tensor xs = + islice.getSharedDataTensor({islice.width()}, t * islice.width()); + + hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width()); if (t > 0) { - hs_prev = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()), + hs_prev = oslice.getSharedDataTensor({oslice.width()}, (t - 1) * oslice.width()); } else { hs_prev = h_prev.getBatchSlice(b, 1); @@ -204,12 +201,12 @@ void RNNLayer::copy(std::shared_ptr l) { std::shared_ptr from = std::static_pointer_cast(l); this->unit = from->unit; + this->return_sequences = from->return_sequences; + this->acti_func = from->acti_func; } void RNNLayer::calcDerivative() { - Tensor derivative_; - derivative_ = hidden->getGradientRef(); - + Tensor &derivative_ = hidden->getGradientRef(); Tensor &weight = weightAt(static_cast(RNNParams::weight_xh)).getVariableRef(); Tensor &ret_ = net_input[0]->getGradientRef(); @@ -227,9 +224,11 @@ void RNNLayer::calcGradient() { Tensor &weight_hh = weightAt(static_cast(RNNParams::weight_hh)).getVariableRef(); - Tensor derivative_; - Tensor hidden_; - derivative_ = hidden->getGradientRef(); + djdw_x.setZero(); + djdw_h.setZero(); + djdb_h.setZero(); + + Tensor &derivative_ = hidden->getGradientRef(); if (!return_sequences) { TensorDim d = derivative_.getDim(); @@ -243,7 +242,7 @@ void RNNLayer::calcGradient() { derivative_.copy(net_hidden[0]->getGradientRef()); } - hidden_ = hidden->getVariableRef(); + Tensor &hidden_ = hidden->getVariableRef(); Tensor &input_ = net_input[0]->getVariableRef(); Tensor dh_nx = Tensor(TensorDim(1, 1, 1, derivative_.width())); @@ -281,9 +280,9 @@ void RNNLayer::calcGradient() { acti_func.run_prime_fn(hs, dh, dh); djdb_h.add_i(dh); - djdw_x.add_i(xs.dot(dh, true, false)); - djdw_h.add_i(hs_prev.dot(dh, true, false)); - dh.dot(weight_hh, dh_nx, false, true, 1.0); + xs.dot(dh, djdw_x, true, false, 1.0); + hs_prev.dot(dh, djdw_h, true, false, 1.0); + dh.dot(weight_hh, dh_nx, false, true); } } } diff --git a/packaging/unittest_models.tar.gz b/packaging/unittest_models.tar.gz index 02a9185..e28ccf8 100644 Binary files a/packaging/unittest_models.tar.gz and b/packaging/unittest_models.tar.gz differ diff --git a/test/input_gen/genModelTests.py b/test/input_gen/genModelTests.py index 0f07fa3..9950432 100644 --- a/test/input_gen/genModelTests.py +++ b/test/input_gen/genModelTests.py @@ -477,3 +477,71 @@ if __name__ == "__main__": rnn = K.layers.SimpleRNN(2, return_sequences=True) rnn_layer_return_sequence_tc(rnn)(file_name="rnn_return_sequences.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],) + rnn_layer_return_sequence_with_batch = lambda rnn_layer: partial( + record, + model=[ + K.Input(batch_shape=(2, 2, 1)), + rnn_layer, + K.layers.Dense(1) + ], + optimizer=opt.SGD(learning_rate=0.1), + iteration=1, + input_shape=(2,2,1), + label_shape=(2,2,1), + is_onehot=False, + loss_fn_str="mse" + ) + rnn = K.layers.SimpleRNN(2, return_sequences=True) + rnn_layer_return_sequence_with_batch(rnn)(file_name="rnn_return_sequence_with_batch.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],) + + + rnn_layer_return_sequence_with_batch_n = lambda rnn_layer: partial( + record, + model=[ + K.Input(batch_shape=(2, 2, 1)), + rnn_layer, + K.layers.Dense(1) + ], + optimizer=opt.SGD(learning_rate=0.1), + iteration=2, + input_shape=(2,2,1), + label_shape=(2,2,1), + is_onehot=False, + loss_fn_str="mse" + ) + rnn = K.layers.SimpleRNN(2, return_sequences=True) + rnn_layer_return_sequence_with_batch_n(rnn)(file_name="rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],) + + multi_rnn_layer_return_sequence = partial( + record, + model=[ + K.Input(batch_shape=(1, 2, 1)), + K.layers.SimpleRNN(2, return_sequences=True), + K.layers.SimpleRNN(2), + K.layers.Dense(1) + ], + optimizer=opt.SGD(learning_rate=0.1), + iteration=1, + input_shape=(1,2,1), + label_shape=(1,1,1), + is_onehot=False, + loss_fn_str="mse" + ) + multi_rnn_layer_return_sequence(file_name="multi_rnn_return_sequence.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],) + + multi_rnn_layer_return_sequence_with_batch_n = partial( + record, + model=[ + K.Input(batch_shape=(2, 2, 1)), + K.layers.SimpleRNN(2, return_sequences=True), + K.layers.SimpleRNN(2), + K.layers.Dense(1) + ], + optimizer=opt.SGD(learning_rate=0.1), + iteration=2, + input_shape=(2,2,1), + label_shape=(2,1), + is_onehot=False, + loss_fn_str="mse" + ) + multi_rnn_layer_return_sequence_with_batch_n(file_name="multi_rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],) diff --git a/test/unittest/unittest_nntrainer_models.cpp b/test/unittest/unittest_nntrainer_models.cpp index a911da4..2b4903b 100644 --- a/test/unittest/unittest_nntrainer_models.cpp +++ b/test/unittest/unittest_nntrainer_models.cpp @@ -1138,6 +1138,58 @@ INI multi_lstm_return_sequence_with_batch_n( } ); +INI rnn_return_sequence_with_batch( + "rnn_return_sequence_with_batch", + { + nn_base + "loss=mse | batch_size=2", + sgd_base + "learning_rate = 0.1", + I("input") + input_base + "input_shape=1:2:1", + I("rnn") + rnn_base + + "unit = 2" + "input_layers=input"+ "return_sequences=true", + I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn" + } +); + +INI rnn_return_sequence_with_batch_n( + "rnn_return_sequence_with_batch_n", + { + nn_base + "loss=mse | batch_size=2", + sgd_base + "learning_rate = 0.1", + I("input") + input_base + "input_shape=1:2:1", + I("rnn") + rnn_base + + "unit = 2" + "input_layers=input"+ "return_sequences=true", + I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn" + } +); + +INI multi_rnn_return_sequence( + "multi_rnn_return_sequence", + { + nn_base + "loss=mse | batch_size=1", + sgd_base + "learning_rate = 0.1", + I("input") + input_base + "input_shape=1:2:1", + I("rnn") + rnn_base + + "unit = 2" + "input_layers=input"+ "return_sequences=true", + I("rnn2") + rnn_base + + "unit = 2" + "input_layers=rnn", + I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2" + } +); + + +INI multi_rnn_return_sequence_with_batch_n( + "multi_rnn_return_sequence_with_batch_n", + { + nn_base + "loss=mse | batch_size=2", + sgd_base + "learning_rate = 0.1", + I("input") + input_base + "input_shape=1:2:1", + I("rnn") + rnn_base + + "unit = 2" + "input_layers=input"+ "return_sequences=true", + I("rnn2") + rnn_base + + "unit = 2" + "input_layers=rnn", + I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2" + } +); INSTANTIATE_TEST_CASE_P( @@ -1190,7 +1242,12 @@ INSTANTIATE_TEST_CASE_P( mkModelTc(lstm_return_sequence_with_batch, "2:1:2:1", 1), mkModelTc(lstm_return_sequence_with_batch_n, "2:1:2:1", 2), mkModelTc(multi_lstm_return_sequence, "1:1:1:1", 1), - mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2) + mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2), + mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 1), + mkModelTc(rnn_return_sequence_with_batch_n, "2:1:2:1", 2), + mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 1), + mkModelTc(multi_rnn_return_sequence_with_batch_n, "2:1:1:1", 2) + // / #if gtest_version <= 1.7.0 )); /// #else gtest_version > 1.8.0