[ Recurrent ] Fix activation type
authorjijoong.moon <jijoong.moon@samsung.com>
Thu, 10 Jun 2021 12:01:01 +0000 (21:01 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 17 Jun 2021 06:23:09 +0000 (15:23 +0900)
using hidden_state_activation_type

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
nntrainer/layers/gru.cpp
nntrainer/layers/rnn.cpp
nntrainer/layers/rnn.h
test/input_gen/genModelTests.py
test/unittest/unittest_nntrainer_models.cpp

index cc8ccf3..6a6fe7b 100644 (file)
@@ -102,9 +102,9 @@ int GRULayer::initialize(Manager &manager) {
 
   h_prev = Tensor(h_dim);
 
-  if (LayerV1::activation_type == ActivationType::ACT_NONE) {
-    LayerV1::activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(activation_type);
+  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
+    hidden_state_activation_type = ActivationType::ACT_TANH;
+    acti_func.setActiFunc(hidden_state_activation_type);
   }
 
   if (recurrent_activation_type == ActivationType::ACT_NONE) {
@@ -179,6 +179,11 @@ void GRULayer::forwarding(bool training) {
   Tensor hs_prev;
   Tensor hs;
 
+  // zt = sigma(W_hz.h_prev + W_xz.xs)
+  // rt = sigma(W_hr.h_prev + W_xr.xs)
+  // gt = tanh((h_prev*rt).W_hr + W_xg.xs)
+  // h_nx = (1-zt)*gt + zt*h_prev
+
   for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
     Tensor islice = input_.getBatchSlice(b, 1);
     Tensor oslice = hidden_.getBatchSlice(b, 1);
index 1714eda..d57d615 100644 (file)
@@ -92,9 +92,9 @@ int RNNLayer::initialize(Manager &manager) {
   // TODO : We could control with something like #define test to save memory
   hidden = std::make_shared<Var_Grad>(d, true, true, "RNN:temp_hidden");
 
-  if (LayerV1::activation_type == ActivationType::ACT_NONE) {
-    LayerV1::activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(activation_type);
+  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
+    hidden_state_activation_type = ActivationType::ACT_TANH;
+    acti_func.setActiFunc(hidden_state_activation_type);
   }
 
   return status;
@@ -111,10 +111,10 @@ void RNNLayer::setProperty(const PropertyType type, const std::string &value) {
       output_dim[0].width(unit);
     }
     break;
-  case PropertyType::activation:
+  case PropertyType::hidden_state_activation:
     if (!value.empty()) {
       ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-      LayerV1::activation_type = acti_type;
+      hidden_state_activation_type = acti_type;
       acti_func.setActiFunc(acti_type);
     }
     break;
@@ -201,6 +201,7 @@ void RNNLayer::copy(std::shared_ptr<LayerV1> l) {
 
   std::shared_ptr<RNNLayer> from = std::static_pointer_cast<RNNLayer>(l);
   this->unit = from->unit;
+  this->hidden_state_activation_type = from->hidden_state_activation_type;
   this->return_sequences = from->return_sequences;
   this->acti_func = from->acti_func;
 }
index 113bd09..18f7a9e 100644 (file)
@@ -30,9 +30,13 @@ public:
    * @brief     Constructor of RNNLayer
    */
   template <typename... Args>
-  RNNLayer(unsigned int unit_ = 0, bool sequence = false, Args... args) :
+  RNNLayer(
+    unsigned int unit_ = 0,
+    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
+    bool sequence = false, Args... args) :
     LayerV1(args...),
     unit(unit_),
+    hidden_state_activation_type(hidden_state_activation_type_),
     return_sequences(sequence){};
 
   /**
@@ -103,6 +107,11 @@ private:
   unsigned int unit;
 
   /**
+   * @brief     activation type for recurrent : default is tanh
+   */
+  ActivationType hidden_state_activation_type;
+
+  /**
    * @brief     activation function for h_t : default is tanh
    */
   ActiFunc acti_func;
index 8f2bfce..5bde0ce 100644 (file)
@@ -425,25 +425,3 @@ if __name__ == "__main__":
     )
     multi_rnn_layer_tc(1,2)(file_name="multi_rnn_return_sequence.info")
     multi_rnn_layer_tc(2,2)(file_name="multi_rnn_return_sequence_with_batch.info")
-    
-    gru_layer_tc = lambda batch, time, return_sequences: partial(
-        record,
-        model=[
-            K.Input(batch_shape=(batch,time, 1)),
-            K.layers.GRU(
-                time,
-                recurrent_activation="sigmoid",
-                activation="tanh",
-                return_sequences=return_sequences,
-            ),
-            K.layers.Dense(1),
-        ],
-        optimizer=opt.SGD(learning_rate=0.1),
-        iteration=10,
-        input_shape=(batch, time, 1),
-        label_shape=(batch, 1),
-        is_onehot=False,
-        loss_fn_str="mse",
-    )
-
-    gru_layer_tc(1, 1, False)(file_name="gru_basic.info")
index 7138e9a..deab140 100644 (file)
@@ -1208,18 +1208,6 @@ INI multi_rnn_return_sequence_with_batch(
   }
 );
 
-INI gru_basic(
-  "gru_basic",
-  {
-    nn_base + "loss=mse | batch_size=1",
-    sgd_base + "learning_rate = 0.1",
-    I("input") + input_base + "input_shape=1:1:1",
-    I("gru") + gru_base +
-      "unit = 1" + "input_layers=input",
-    I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
-  }
-);
-
 INSTANTIATE_TEST_CASE_P(
   nntrainerModelAutoTests, nntrainerModelTest, ::testing::Values(
     mkModelTc(fc_sigmoid_mse, "3:1:1:10", 10),
@@ -1272,8 +1260,7 @@ INSTANTIATE_TEST_CASE_P(
     mkModelTc(rnn_return_sequences, "1:1:2:1", 10),
     mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 10),
     mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 10),
-    mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10),
-    mkModelTc(gru_basic, "1:1:1:1", 1)
+    mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10)
 ), [](const testing::TestParamInfo<nntrainerModelTest::ParamType>& info){
  return std::get<0>(info.param).getName();
 });