[ RNN ] Add unittest case & return_sequence
authorjijoong.moon <jijoong.moon@samsung.com>
Fri, 21 May 2021 02:53:36 +0000 (11:53 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 10 Jun 2021 11:11:54 +0000 (20:11 +0900)
This PR includes.
  . unittest of rnn layer
  . support of return sequence of rnn layer

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
nntrainer/graph/network_graph.cpp
nntrainer/layers/lstm.cpp
nntrainer/layers/rnn.cpp
nntrainer/layers/rnn.h
packaging/unittest_models.tar.gz
test/input_gen/genModelTests.py
test/unittest/unittest_nntrainer_models.cpp

index 5490f0c..6644eae 100644 (file)
@@ -180,15 +180,6 @@ int NetworkGraph::realizeActivationType(
 
   ActivationType act = current.getActivationType();
 
-  if (current.getType() == RNNLayer::type) {
-    // No need to add activation layer for RNN Layer
-    // Default activation is tanh
-    if (act == ActivationType::ACT_NONE)
-      act = ActivationType::ACT_TANH;
-    current.setActivation(act);
-    return status;
-  }
-
   if (act == ActivationType::ACT_NONE) {
     /// ActivationType::ACT_NONE does not need realization
     return ML_ERROR_NONE;
index d640ae7..562bc6e 100644 (file)
@@ -390,7 +390,6 @@ void LSTMLayer::calcGradient() {
       recurrent_acti_func.run_prime_fn(hf, dhf, dhf);
       recurrent_acti_func.run_prime_fn(hi, dhi, dhi);
       acti_func.run_prime_fn(hg, dhg, dhg);
-
       djdb_h.add_i(dfgio_t);
       djdw_x.add_i(xs.dot(dfgio_t, true, false));
       djdw_h.add_i(hs_prev.dot(dfgio_t, true, false));
index 83031c6..57785d2 100644 (file)
@@ -43,6 +43,10 @@ int RNNLayer::initialize(Manager &manager) {
   output_dim[0] = input_dim[0];
   output_dim[0].width(unit);
 
+  if (!return_sequences) {
+    output_dim[0].height(1);
+  }
+
   TensorDim bias_dim = TensorDim();
   bias_dim.setTensorDim(3, unit);
 
@@ -81,6 +85,21 @@ int RNNLayer::initialize(Manager &manager) {
   h_prev = Tensor(bias_dim);
   h_prev.setZero();
 
+  TensorDim d = input_dim[0];
+  d.width(unit);
+
+  // We do not need this if we reuse net_hidden[0]. But if we do, then the unit
+  // test will fail. Becuase it modifies the date during gradient calculation
+  // TODO : We could control with something like #define test to save memory
+  hidden = std::make_shared<Var_Grad>(d, true, true, "RNN:temp_hidden");
+  hidden->getVariableRef().setZero();
+  hidden->getGradientRef().setZero();
+
+  if (Layer::activation_type == ActivationType::ACT_NONE) {
+    Layer::activation_type = ActivationType::ACT_TANH;
+    acti_func.setActiFunc(activation_type);
+  }
+
   return status;
 }
 
@@ -102,6 +121,12 @@ void RNNLayer::setProperty(const PropertyType type, const std::string &value) {
       acti_func.setActiFunc(acti_type);
     }
     break;
+  case PropertyType::return_sequences:
+    if (!value.empty()) {
+      status = setBoolean(return_sequences, value);
+      throw_status(status);
+    }
+    break;
   default:
     Layer::setProperty(type, value);
     break;
@@ -117,7 +142,9 @@ void RNNLayer::forwarding(bool training) {
   Tensor &bias_h =
     weightAt(static_cast<int>(RNNParams::bias_h)).getVariableRef();
 
-  Tensor &hidden_ = net_hidden[0]->getVariableRef();
+  Tensor hidden_;
+  hidden_ = hidden->getVariableRef();
+
   Tensor &input_ = net_input[0]->getVariableRef();
 
   Tensor temp;
@@ -158,6 +185,18 @@ void RNNLayer::forwarding(bool training) {
     if (!training)
       h_prev.getBatchSlice(b, 1).copy(hs);
   }
+
+  if (!return_sequences) {
+    TensorDim d = hidden_.getDim();
+    for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
+      float *data = hidden_.getAddress(b * d.width() * d.height() +
+                                       (d.height() - 1) * d.width());
+      float *rdata = net_hidden[0]->getVariableRef().getAddress(b * d.width());
+      std::copy(data, data + d.width(), rdata);
+    }
+  } else {
+    net_hidden[0]->getVariableRef().copy(hidden_);
+  }
 }
 
 void RNNLayer::copy(std::shared_ptr<Layer> l) {
@@ -168,7 +207,9 @@ void RNNLayer::copy(std::shared_ptr<Layer> l) {
 }
 
 void RNNLayer::calcDerivative() {
-  Tensor &derivative_ = net_hidden[0]->getGradientRef();
+  Tensor derivative_;
+  derivative_ = hidden->getGradientRef();
+
   Tensor &weight =
     weightAt(static_cast<int>(RNNParams::weight_xh)).getVariableRef();
   Tensor &ret_ = net_input[0]->getGradientRef();
@@ -186,8 +227,24 @@ void RNNLayer::calcGradient() {
   Tensor &weight_hh =
     weightAt(static_cast<int>(RNNParams::weight_hh)).getVariableRef();
 
-  Tensor &derivative_ = net_hidden[0]->getGradientRef();
-  Tensor &hidden_ = net_hidden[0]->getVariableRef();
+  Tensor derivative_;
+  Tensor hidden_;
+  derivative_ = hidden->getGradientRef();
+
+  if (!return_sequences) {
+    TensorDim d = derivative_.getDim();
+    for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
+      float *data = derivative_.getAddress(b * d.width() * d.height() +
+                                           (d.height() - 1) * d.width());
+      float *rdata = net_hidden[0]->getGradientRef().getAddress(b * d.width());
+      std::copy(rdata, rdata + d.width(), data);
+    }
+  } else {
+    derivative_.copy(net_hidden[0]->getGradientRef());
+  }
+
+  hidden_ = hidden->getVariableRef();
+
   Tensor &input_ = net_input[0]->getVariableRef();
   Tensor dh_nx = Tensor(TensorDim(1, 1, 1, derivative_.width()));
 
@@ -222,17 +279,10 @@ void RNNLayer::calcGradient() {
       }
 
       acti_func.run_prime_fn(hs, dh, dh);
-      dh.multiply_i(hs);
-
-      float alpha = 1.0;
-
-      if (b != 0) {
-        alpha = 0.0;
-      }
 
-      djdb_h.add_i(dh, alpha);
-      djdw_x.add_i(xs.dot(dh, true, false), alpha);
-      djdw_h.add_i(hs_prev.dot(dh, true, false), alpha);
+      djdb_h.add_i(dh);
+      djdw_x.add_i(xs.dot(dh, true, false));
+      djdw_h.add_i(hs_prev.dot(dh, true, false));
       dh.dot(weight_hh, dh_nx, false, true, 1.0);
     }
   }
index e0c9c80..614ad22 100644 (file)
@@ -30,11 +30,10 @@ public:
    * @brief     Constructor of RNNLayer
    */
   template <typename... Args>
-  RNNLayer(unsigned int unit_ = 0, Args... args) : Layer(args...), unit(unit_) {
-    /* Default Activation Type is tanh */
-    if (getActivationType() == ActivationType::ACT_NONE)
-      setActivation(ActivationType::ACT_TANH);
-  }
+  RNNLayer(unsigned int unit_ = 0, bool sequence = false, Args... args) :
+    Layer(args...),
+    unit(unit_),
+    return_sequences(sequence){};
 
   /**
    * @brief     Destructor of RNNLayer
@@ -112,6 +111,16 @@ private:
    * @brief     To save hidden state variable ( batch, 1, 1, unit )
    */
   Tensor h_prev;
+
+  /**
+   * @brief     opiont for return sequence
+   */
+  bool return_sequences;
+
+  /**
+   * @brief     hidden variable for rnn
+   */
+  std::shared_ptr<Var_Grad> hidden;
 };
 } // namespace nntrainer
 
index b655f46..02a9185 100644 (file)
Binary files a/packaging/unittest_models.tar.gz and b/packaging/unittest_models.tar.gz differ
index 354ac23..0f07fa3 100644 (file)
@@ -440,3 +440,40 @@ if __name__ == "__main__":
         loss_fn_str="mse"
     )
     multi_lstm_layer_return_sequence_with_batch_n(file_name="multi_lstm_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)    
+
+
+    rnn_layer_tc = lambda rnn_layer: partial(
+        record,
+        model=[
+            K.Input(batch_shape=(1, 1, 1)),
+            rnn_layer,
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=1,
+        input_shape=(1,1,1),
+        label_shape=(1,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+    rnn = K.layers.SimpleRNN(2)
+    rnn_layer_tc(rnn)(file_name="rnn_basic.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)
+
+    rnn_layer_return_sequence_tc = lambda rnn_layer: partial(
+        record,
+        model=[
+            K.Input(batch_shape=(1, 2, 1)),
+            rnn_layer,
+            K.layers.Dense(1)
+        ],
+        optimizer=opt.SGD(learning_rate=0.1),
+        iteration=1,
+        input_shape=(1,2,1),
+        label_shape=(2,1),
+        is_onehot=False,
+        loss_fn_str="mse"
+    )
+
+    rnn = K.layers.SimpleRNN(2, return_sequences=True)
+    rnn_layer_return_sequence_tc(rnn)(file_name="rnn_return_sequences.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],)
+
index 5f22f4b..6c624c7 100644 (file)
@@ -607,6 +607,7 @@ static nntrainer::IniSection nn_base("model", "type = NeuralNetwork");
 static std::string input_base = "type = input";
 static std::string fc_base = "type = Fully_connected";
 static std::string conv_base = "type = conv2d | stride = 1,1 | padding = 0,0";
+static std::string rnn_base = "type = rnn";
 static std::string lstm_base = "type = lstm";
 static std::string pooling_base = "type = pooling2d | padding = 0,0";
 static std::string preprocess_flip_base = "type = preprocess_flip";
@@ -1116,6 +1117,30 @@ INI lstm_return_sequence_with_batch_n(
   }
 );
 
+INI rnn_basic(
+  "rnn_basic",
+  {
+    nn_base + "loss=mse | batch_size=1",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:1:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+  }
+);
+
+INI rnn_return_sequences(
+  "rnn_return_sequences",
+  {
+    nn_base + "loss=mse | batch_size=1",
+    sgd_base + "learning_rate = 0.1",
+    I("input") + input_base + "input_shape=1:2:1",
+    I("rnn") + rnn_base +
+      "unit = 2" + "input_layers=input" + "return_sequences=true",
+    I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+  }
+);
+
 INI multi_lstm_return_sequence(
   "multi_lstm_return_sequence",
   {
@@ -1190,6 +1215,8 @@ INSTANTIATE_TEST_CASE_P(
     // mkModelTc(fc_softmax_mse_distribute_validate, "3:1:5:3", 1),
     // mkModelTc(fc_softmax_cross_distribute_validate, "3:1:5:3", 1),
     // mkModelTc(fc_sigmoid_cross_distribute_validate, "3:1:5:3", 1)
+    mkModelTc(rnn_basic, "1:1:1:1", 1),
+    mkModelTc(rnn_return_sequences, "1:1:2:1", 1),
     mkModelTc(lstm_basic, "1:1:1:1", 1),
     mkModelTc(lstm_return_sequence, "1:1:2:1", 1),
     mkModelTc(lstm_return_sequence_with_batch, "2:1:2:1", 1),