[ RNN ] Add Multi-Layer RNN Unit Tests
authorjijoong.moon <jijoong.moon@samsung.com>
Fri, 28 May 2021 02:24:46 +0000 (11:24 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Fri, 11 Jun 2021 05:22:57 +0000 (14:22 +0900)
This commit includes,
 . Multi-Layered RNN Unit Tests
 . Some Fixes to run

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
nntrainer/layers/lstm.cpp
nntrainer/layers/rnn.cpp
packaging/unittest_models.tar.gz
test/input_gen/genModelTests.py
test/unittest/unittest_nntrainer_models.cpp

index fcb50b8..f997e78 100644 (file)
@@ -177,8 +177,7 @@ void LSTMLayer::forwarding(bool training) {
   h_prev.setZero();
   c_prev.setZero();
 
-  Tensor hidden_;
-  hidden_ = hidden->getVariableRef();
+  Tensor &hidden_ = hidden->getVariableRef();
 
   Tensor &input_ = net_input[0]->getVariableRef();
   Tensor &m_cell_ = mem_cell->getVariableRef();
@@ -260,6 +259,7 @@ void LSTMLayer::copy(std::shared_ptr<Layer> l) {
   this->acti_func = from->acti_func;
   this->recurrent_activation_type = from->recurrent_activation_type;
   this->recurrent_acti_func = from->recurrent_acti_func;
+  this->return_sequences = from->return_sequences;
 }
 
 void LSTMLayer::calcDerivative() {
@@ -289,7 +289,6 @@ void LSTMLayer::calcGradient() {
   fgio->getGradientRef().setZero();
 
   Tensor derivative_ = hidden->getGradientRef();
-  Tensor hidden_;
 
   if (!return_sequences) {
     TensorDim d = derivative_.getDim();
@@ -306,7 +305,7 @@ void LSTMLayer::calcGradient() {
               derivative_.getData());
   }
 
-  hidden_ = hidden->getVariableRef();
+  Tensor &hidden_ = hidden->getVariableRef();
 
   Tensor &input_ = net_input[0]->getVariableRef();
   Tensor &m_cell_ = mem_cell->getVariableRef();
index 57785d2..0053ea5 100644 (file)
@@ -83,7 +83,6 @@ int RNNLayer::initialize(Manager &manager) {
 
   bias_dim.batch(input_dim[0].batch());
   h_prev = Tensor(bias_dim);
-  h_prev.setZero();
 
   TensorDim d = input_dim[0];
   d.width(unit);
@@ -92,8 +91,6 @@ int RNNLayer::initialize(Manager &manager) {
   // test will fail. Becuase it modifies the date during gradient calculation
   // TODO : We could control with something like #define test to save memory
   hidden = std::make_shared<Var_Grad>(d, true, true, "RNN:temp_hidden");
-  hidden->getVariableRef().setZero();
-  hidden->getGradientRef().setZero();
 
   if (Layer::activation_type == ActivationType::ACT_NONE) {
     Layer::activation_type = ActivationType::ACT_TANH;
@@ -142,32 +139,32 @@ void RNNLayer::forwarding(bool training) {
   Tensor &bias_h =
     weightAt(static_cast<int>(RNNParams::bias_h)).getVariableRef();
 
-  Tensor hidden_;
-  hidden_ = hidden->getVariableRef();
+  hidden->getVariableRef().setZero();
+
+  if (training) {
+    hidden->getGradientRef().setZero();
+  }
+  h_prev.setZero();
 
+  Tensor &hidden_ = hidden->getVariableRef();
   Tensor &input_ = net_input[0]->getVariableRef();
 
   Tensor temp;
   Tensor hs_prev;
   Tensor hs;
 
-  if (training)
-    h_prev.setZero();
-
   // TODO : check merge b and t index
   for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
     Tensor islice = input_.getBatchSlice(b, 1);
     Tensor oslice = hidden_.getBatchSlice(b, 1);
 
     for (unsigned int t = 0; t < islice.height(); ++t) {
-      Tensor xs = islice.getSharedDataTensor(TensorDim(1, 1, 1, islice.width()),
-                                             t * islice.width());
-      // Calculate Hidden
-      // activation(xs.dot(weight_xh).add(hs_prev.dot(weight_hh).add(bias_h)));
-      hs = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()),
-                                      t * oslice.width());
+      Tensor xs =
+        islice.getSharedDataTensor({islice.width()}, t * islice.width());
+
+      hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width());
       if (t > 0) {
-        hs_prev = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()),
+        hs_prev = oslice.getSharedDataTensor({oslice.width()},
                                              (t - 1) * oslice.width());
       } else {
         hs_prev = h_prev.getBatchSlice(b, 1);
@@ -204,12 +201,12 @@ void RNNLayer::copy(std::shared_ptr<Layer> l) {
 
   std::shared_ptr<RNNLayer> from = std::static_pointer_cast<RNNLayer>(l);
   this->unit = from->unit;
+  this->return_sequences = from->return_sequences;
+  this->acti_func = from->acti_func;
 }
 
 void RNNLayer::calcDerivative() {
-  Tensor derivative_;
-  derivative_ = hidden->getGradientRef();
-
+  Tensor &derivative_ = hidden->getGradientRef();
   Tensor &weight =
     weightAt(static_cast<int>(RNNParams::weight_xh)).getVariableRef();
   Tensor &ret_ = net_input[0]->getGradientRef();
@@ -227,9 +224,11 @@ void RNNLayer::calcGradient() {
   Tensor &weight_hh =
     weightAt(static_cast<int>(RNNParams::weight_hh)).getVariableRef();
 
-  Tensor derivative_;
-  Tensor hidden_;
-  derivative_ = hidden->getGradientRef();
+  djdw_x.setZero();
+  djdw_h.setZero();
+  djdb_h.setZero();
+
+  Tensor &derivative_ = hidden->getGradientRef();
 
   if (!return_sequences) {
     TensorDim d = derivative_.getDim();
@@ -243,7 +242,7 @@ void RNNLayer::calcGradient() {
     derivative_.copy(net_hidden[0]->getGradientRef());
   }
 
-  hidden_ = hidden->getVariableRef();
+  Tensor &hidden_ = hidden->getVariableRef();
 
   Tensor &input_ = net_input[0]->getVariableRef();
   Tensor dh_nx = Tensor(TensorDim(1, 1, 1, derivative_.width()));
@@ -281,9 +280,9 @@ void RNNLayer::calcGradient() {
       acti_func.run_prime_fn(hs, dh, dh);
 
       djdb_h.add_i(dh);
-      djdw_x.add_i(xs.dot(dh, true, false));
-      djdw_h.add_i(hs_prev.dot(dh, true, false));
-      dh.dot(weight_hh, dh_nx, false, true, 1.0);
+      xs.dot(dh, djdw_x, true, false, 1.0);
+      hs_prev.dot(dh, djdw_h, true, false, 1.0);
+      dh.dot(weight_hh, dh_nx, false, true);
     }
   }
 }
index 02a9185..e28ccf8 100644 (file)
Binary files a/packaging/unittest_models.tar.gz and b/packaging/unittest_models.tar.gz differ
index 0f07fa3..9950432 100644 (file)
@@ -477,3 +477,71 @@ if __name__ == "__main__":
     rnn = K.layers.SimpleRNN(2, return_sequences=True)
     rnn_layer_return_sequence_tc(rnn)(file_name="rnn_return_sequences.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],)
 
+    rnn_layer_return_sequence_with_batch = lambda rnn_layer: partial(
+        record,
+        model=[
+            K.Input(batch_shape=(2, 2, 1)),
+            rnn_layer,
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=1,
+        input_shape=(2,2,1),
+        label_shape=(2,2,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+    rnn = K.layers.SimpleRNN(2, return_sequences=True)
+    rnn_layer_return_sequence_with_batch(rnn)(file_name="rnn_return_sequence_with_batch.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],)
+
+
+    rnn_layer_return_sequence_with_batch_n = lambda rnn_layer: partial(
+        record,
+        model=[
+            K.Input(batch_shape=(2, 2, 1)),
+            rnn_layer,
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=2,
+        input_shape=(2,2,1),
+        label_shape=(2,2,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+    rnn = K.layers.SimpleRNN(2, return_sequences=True)
+    rnn_layer_return_sequence_with_batch_n(rnn)(file_name="rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)
+
+    multi_rnn_layer_return_sequence = partial(
+        record,
+        model=[
+            K.Input(batch_shape=(1, 2, 1)),
+            K.layers.SimpleRNN(2, return_sequences=True),
+            K.layers.SimpleRNN(2),
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=1,
+        input_shape=(1,2,1),
+        label_shape=(1,1,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+    multi_rnn_layer_return_sequence(file_name="multi_rnn_return_sequence.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)    
+
+    multi_rnn_layer_return_sequence_with_batch_n = partial(
+        record,
+        model=[
+            K.Input(batch_shape=(2, 2, 1)),
+            K.layers.SimpleRNN(2, return_sequences=True),
+            K.layers.SimpleRNN(2),
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=2,
+        input_shape=(2,2,1),
+        label_shape=(2,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+    multi_rnn_layer_return_sequence_with_batch_n(file_name="multi_rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)    
index a911da4..2b4903b 100644 (file)
@@ -1138,6 +1138,58 @@ INI multi_lstm_return_sequence_with_batch_n(
   }
 );
 
+INI rnn_return_sequence_with_batch(
+  "rnn_return_sequence_with_batch",
+  {
+    nn_base + "loss=mse | batch_size=2",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:2:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input"+ "return_sequences=true",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+  }
+);
+
+INI rnn_return_sequence_with_batch_n(
+  "rnn_return_sequence_with_batch_n",
+  {
+    nn_base + "loss=mse | batch_size=2",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:2:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input"+ "return_sequences=true",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+  }
+);
+
+INI multi_rnn_return_sequence(
+  "multi_rnn_return_sequence",
+  {
+    nn_base + "loss=mse | batch_size=1",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:2:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input"+ "return_sequences=true",
+    I("rnn2") + rnn_base +
+      "unit = 2" + "input_layers=rnn",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
+  }
+);
+
+
+INI multi_rnn_return_sequence_with_batch_n(
+  "multi_rnn_return_sequence_with_batch_n",
+  {
+    nn_base + "loss=mse | batch_size=2",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:2:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input"+ "return_sequences=true",
+    I("rnn2") + rnn_base +
+      "unit = 2" + "input_layers=rnn",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
+  }
+);
 
 
 INSTANTIATE_TEST_CASE_P(
@@ -1190,7 +1242,12 @@ INSTANTIATE_TEST_CASE_P(
     mkModelTc(lstm_return_sequence_with_batch, "2:1:2:1", 1),
     mkModelTc(lstm_return_sequence_with_batch_n, "2:1:2:1", 2),
     mkModelTc(multi_lstm_return_sequence, "1:1:1:1", 1),
-    mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2)
+    mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2),
+    mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 1),
+    mkModelTc(rnn_return_sequence_with_batch_n, "2:1:2:1", 2),
+    mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 1),
+    mkModelTc(multi_rnn_return_sequence_with_batch_n, "2:1:1:1", 2)
+
 // / #if gtest_version <= 1.7.0
 ));
 /// #else gtest_version > 1.8.0