[rnn lstm gru layer] Maintain rnn, lstm, gru layer property with props
authorhyeonseok lee <hs89.lee@samsung.com>
Thu, 9 Sep 2021 14:11:06 +0000 (23:11 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 13 Sep 2021 11:59:57 +0000 (20:59 +0900)
 - All rnn, lstm, gru layer property will be maintain with props
 - Allow dropout to zero

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: hyeonseok lee <hs89.lee@samsung.com>
nntrainer/layers/common_properties.cpp
nntrainer/layers/common_properties.h
nntrainer/layers/dropout.cpp
nntrainer/layers/dropout.h
nntrainer/layers/gru.cpp
nntrainer/layers/gru.h
nntrainer/layers/lstm.cpp
nntrainer/layers/lstm.h
nntrainer/layers/rnn.cpp
nntrainer/layers/rnn.h

index 62eb92c..4ce910c 100644 (file)
@@ -47,8 +47,8 @@ Normalization::Normalization(bool value) { set(value); }
 
 Standardization::Standardization(bool value) { set(value); }
 
-bool DropOutSpec::isValid(const float &v) const {
-  if (v <= 0.0)
+bool DropOutRate::isValid(const float &v) const {
+  if (v < 0.0)
     return false;
   else
     return true;
@@ -71,6 +71,8 @@ void FilePath::set(const std::string &v) {
 
 std::ifstream::pos_type FilePath::file_size() { return cached_pos_size; }
 
+ReturnSequences::ReturnSequences(bool value) { set(value); }
+
 bool NumClass::isValid(const unsigned int &v) const { return v > 0; }
 
 ConnectionSpec::ConnectionSpec(const std::vector<props::Name> &layer_ids_,
@@ -194,6 +196,14 @@ std::array<unsigned int, 4> Padding2D::compute(const TensorDim &input,
 
 std::string ConnectionSpec::NoneType = "";
 
+HiddenStateActivation::HiddenStateActivation(ActivationTypeInfo::Enum value) {
+  set(value);
+};
+
+RecurrentActivation::RecurrentActivation(ActivationTypeInfo::Enum value) {
+  set(value);
+};
+
 } // namespace props
 
 static const std::vector<std::pair<char, std::string>>
index 9d99bd0..1352849 100644 (file)
@@ -379,22 +379,22 @@ public:
 };
 
 /**
- * @brief DropOutSpec property, this defines drop out specification of layer
+ * @brief DropOutRate property, this defines drop out specification of layer
  *
  */
-class DropOutSpec : public nntrainer::Property<float> {
+class DropOutRate : public nntrainer::Property<float> {
 
 public:
   /**
-   * @brief Construct a new DropOut object with a default value 0.0
+   * @brief Construct a new DropOutRate object with a default value 0.0
    *
    */
-  DropOutSpec(float value = 0.0) : nntrainer::Property<float>(value) {}
+  DropOutRate(float value = 0.0) : nntrainer::Property<float>(value) {}
   static constexpr const char *key = "dropout"; /**< unique key to access */
   using prop_tag = float_prop_tag;              /**< property type */
 
   /**
-   * @brief DropOutSpec validator
+   * @brief DropOutRate validator
    *
    * @param v float to validate
    * @retval true if it is greater or equal than 0.0
@@ -470,6 +470,22 @@ private:
 };
 
 /**
+ * @brief return sequence property, used to check
+ * whether return only the last output. Return last output if true.
+ *
+ */
+class ReturnSequences : public nntrainer::Property<bool> {
+public:
+  /**
+   * @brief Construct a new ReturnSequences object
+   *
+   */
+  ReturnSequences(bool value = false);
+  static constexpr const char *key = "return_sequences";
+  using prop_tag = bool_prop_tag;
+};
+
+/**
  * @brief Number of class
  * @todo deprecate this
  */
@@ -509,6 +525,40 @@ public:
 };
 
 /**
+ * @brief HiddenStateActivation Enumeration Information
+ *
+ */
+class HiddenStateActivation final : public EnumProperty<ActivationTypeInfo> {
+public:
+  /**
+   * @brief Construct a new HiddenStateActivation object with default value
+   * ActivationTypeInfo::Enum::ACT_NONE
+   *
+   */
+  HiddenStateActivation(
+    ActivationTypeInfo::Enum value = ActivationTypeInfo::Enum::ACT_NONE);
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "hidden_state_activation";
+};
+
+/**
+ * @brief RecurrentActivation Enumeration Information
+ *
+ */
+class RecurrentActivation final : public EnumProperty<ActivationTypeInfo> {
+public:
+  /**
+   * @brief Construct a new RecurrentActivation object with default value
+   * ActivationTypeInfo::Enum::ACT_NONE
+   *
+   */
+  RecurrentActivation(
+    ActivationTypeInfo::Enum value = ActivationTypeInfo::Enum::ACT_NONE);
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "recurrent_activation";
+};
+
+/**
  * @brief     Enumeration of pooling type
  */
 struct PoolingTypeInfo {
index 02dba0b..789a0db 100644 (file)
@@ -34,7 +34,7 @@ void DropOutLayer::finalize(InitLayerContext &context) {
 }
 
 void DropOutLayer::forwarding(RunLayerContext &context, bool training) {
-  auto &rate_ = std::get<props::DropOutSpec>(dropout_rate).get();
+  auto &rate_ = std::get<props::DropOutRate>(dropout_rate).get();
 
   // Assume it is in-place calculation. It means input and output share mem
   // buffer. So if the training is false, the output is the same with input. In
@@ -57,7 +57,7 @@ void DropOutLayer::forwarding(RunLayerContext &context, bool training) {
 
 void DropOutLayer::calcDerivative(RunLayerContext &context) {
   // Assume it is in-place calculation
-  auto &rate_ = std::get<props::DropOutSpec>(dropout_rate).get();
+  auto &rate_ = std::get<props::DropOutRate>(dropout_rate).get();
   if (rate_ > epsilon) {
     for (unsigned int i = 0; i < context.getNumInputs(); ++i) {
       Tensor &derivative_ = context.getIncomingDerivative(i);
index b7615f5..544bdaa 100644 (file)
@@ -32,7 +32,7 @@ public:
    */
   DropOutLayer(float dropout = 0.0) :
     Layer(),
-    dropout_rate(props::DropOutSpec(dropout)),
+    dropout_rate(props::DropOutRate(dropout)),
     epsilon(1e-3) {}
 
   /**
@@ -99,7 +99,7 @@ public:
   inline static const std::string type = "dropout";
 
 private:
-  std::tuple<props::DropOutSpec> dropout_rate;
+  std::tuple<props::DropOutRate> dropout_rate;
   std::vector<unsigned int> mask_idx;
   float epsilon;
 };
index 07e9481..1131f4e 100644 (file)
@@ -51,6 +51,16 @@ enum GRUParams {
 
 #define NUM_GATE 3
 
+GRULayer::GRULayer() :
+  LayerImpl(),
+  gru_props(props::Unit(), props::HiddenStateActivation(),
+            props::RecurrentActivation(), props::ReturnSequences(),
+            props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  recurrent_acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
 // - weight_xh ( input to hidden )
 //  : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
 // - weight_hh ( hidden to hidden )
@@ -58,7 +68,13 @@ enum GRUParams {
 // - bias_h ( hidden bias )
 //  : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
 void GRULayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(gru_props);
+  auto &recurrent_activation_type =
+    std::get<props::RecurrentActivation>(gru_props);
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
 
   if (context.getNumInputs() != 1) {
     throw std::invalid_argument("GRU layer takes only one input");
@@ -132,80 +148,32 @@ void GRULayer::finalize(InitLayerContext &context) {
     h_dim, context.getName() + ":h_prev", Tensor::Initializer::NONE, false,
     FORWARD_FUNC_LIFESPAN);
 
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
   }
 
-  if (recurrent_activation_type == ActivationType::ACT_NONE) {
-    recurrent_activation_type = ActivationType::ACT_SIGMOID;
-    recurrent_acti_func.setActiFunc(recurrent_activation_type);
+  if (recurrent_activation_type.get() == ActivationType::ACT_NONE) {
+    recurrent_activation_type.set(ActivationType::ACT_SIGMOID);
+    recurrent_acti_func.setActiFunc(recurrent_activation_type.get());
   }
 }
 
 void GRULayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void GRULayer::setProperty(const std::string &type_str,
-                           const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::recurrent_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    recurrent_activation_type = acti_type;
-    recurrent_acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout: {
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-  } break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, gru_props);
+  LayerImpl::setProperty(remain_props);
 }
 
 void GRULayer::exportTo(Exporter &exporter, const ExportMethods &method) const {
   LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(gru_props, method, this);
 }
 
 void GRULayer::forwarding(RunLayerContext &context, bool training) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
+
   Tensor &weight_xh = context.getWeight(wt_idx[GRUParams::weight_xh]);
   Tensor &weight_hh = context.getWeight(wt_idx[GRUParams::weight_hh]);
   Tensor &bias_h = context.getWeight(wt_idx[GRUParams::bias_h]);
@@ -319,7 +287,10 @@ void GRULayer::calcDerivative(RunLayerContext &context) {
 }
 
 void GRULayer::calcGradient(RunLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
+
   Tensor &djdw_x = context.getWeightGrad(wt_idx[GRUParams::weight_xh]);
   Tensor &djdw_h = context.getWeightGrad(wt_idx[GRUParams::weight_hh]);
   Tensor &djdb_h = context.getWeightGrad(wt_idx[GRUParams::bias_h]);
index 4394387..7fef28f 100644 (file)
@@ -30,20 +30,7 @@ public:
   /**
    * @brief     Constructor of GRULayer
    */
-  GRULayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
-    bool sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    recurrent_activation_type(recurrent_activation_type_),
-    recurrent_acti_func(recurrent_activation_type, true),
-    return_sequences(sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  GRULayer();
 
   /**
    * @brief     Destructor of GRULayer
@@ -106,14 +93,19 @@ public:
   inline static const std::string type = "gru";
 
 private:
-  std::tuple<props::Unit>
-    props; /**< lstm layer properties : unit - number of output neurons */
-  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
   /**
-   * @brief     activation type for hidden state : default is sigmoid
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * RecurrentActivation: activation type for recurrent. default is sigmoid
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation,
+             props::RecurrentActivation, props::ReturnSequences,
+             props::DropOutRate>
+    gru_props;
+  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
 
   /**
    * @brief     activation function for h_t : default is sigmoid
@@ -121,39 +113,14 @@ private:
   ActiFunc acti_func;
 
   /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType recurrent_activation_type;
-
-  /**
    * @brief     activation function for recurrent : default is tanh
    */
   ActiFunc recurrent_acti_func;
 
   /**
-   * @brief     variable to set return sequences
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
    * @brief     to pretect overflow
    */
   float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
 };
 } // namespace nntrainer
 
index 0eb455b..1243251 100644 (file)
@@ -36,6 +36,16 @@ enum LSTMParams {
 
 #define NUM_GATE 4
 
+LSTMLayer::LSTMLayer() :
+  LayerImpl(),
+  lstm_props(props::Unit(), props::HiddenStateActivation(),
+             props::RecurrentActivation(), props::ReturnSequences(),
+             props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  recurrent_acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
 // - weight_xh ( input to hidden )
 //  : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
 // - weight_hh ( hidden to hidden )
@@ -43,7 +53,13 @@ enum LSTMParams {
 // - bias_h ( hidden bias )
 //  : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
 void LSTMLayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(lstm_props);
+  auto &recurrent_activation_type =
+    std::get<props::RecurrentActivation>(lstm_props);
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
 
   if (context.getNumInputs() != 1) {
     throw std::invalid_argument("LSTM layer takes only one input");
@@ -113,81 +129,33 @@ void LSTMLayer::finalize(InitLayerContext &context) {
     context.requestTensor(d, context.getName() + ":fgio",
                           Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
   }
 
-  if (recurrent_activation_type == ActivationType::ACT_NONE) {
-    recurrent_activation_type = ActivationType::ACT_SIGMOID;
-    recurrent_acti_func.setActiFunc(recurrent_activation_type);
+  if (recurrent_activation_type.get() == ActivationType::ACT_NONE) {
+    recurrent_activation_type.set(ActivationType::ACT_SIGMOID);
+    recurrent_acti_func.setActiFunc(recurrent_activation_type.get());
   }
 }
 
 void LSTMLayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void LSTMLayer::setProperty(const std::string &type_str,
-                            const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::recurrent_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    recurrent_activation_type = acti_type;
-    recurrent_acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout:
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-    break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, lstm_props);
+  LayerImpl::setProperty(remain_props);
 }
 
 void LSTMLayer::exportTo(Exporter &exporter,
                          const ExportMethods &method) const {
   LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(lstm_props, method, this);
 }
 
 void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
+
   Tensor &weight_xh = context.getWeight(wt_idx[LSTMParams::weight_xh]);
   Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);
   Tensor &bias_h = context.getWeight(wt_idx[LSTMParams::bias_h]);
@@ -282,7 +250,10 @@ void LSTMLayer::calcDerivative(RunLayerContext &context) {
 }
 
 void LSTMLayer::calcGradient(RunLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
+
   Tensor &djdw_x = context.getWeightGrad(wt_idx[LSTMParams::weight_xh]);
   Tensor &djdw_h = context.getWeightGrad(wt_idx[LSTMParams::weight_hh]);
   Tensor &djdb_h = context.getWeightGrad(wt_idx[LSTMParams::bias_h]);
index 20638f1..b034b6b 100644 (file)
@@ -30,20 +30,7 @@ public:
   /**
    * @brief     Constructor of LSTMLayer
    */
-  LSTMLayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
-    bool sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    recurrent_activation_type(recurrent_activation_type_),
-    recurrent_acti_func(recurrent_activation_type, true),
-    return_sequences(sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  LSTMLayer();
 
   /**
    * @brief     Destructor of LSTMLayer
@@ -106,14 +93,19 @@ public:
   inline static const std::string type = "lstm";
 
 private:
-  std::tuple<props::Unit>
-    props; /**< lstm layer properties : unit - number of output neurons */
-  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
   /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * RecurrentActivation: activation type for recurrent. default is sigmoid
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation,
+             props::RecurrentActivation, props::ReturnSequences,
+             props::DropOutRate>
+    lstm_props;
+  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
 
   /**
    * @brief     activation function for h_t : default is tanh
@@ -121,39 +113,14 @@ private:
   ActiFunc acti_func;
 
   /**
-   * @brief     activation type for recurrent : default is sigmoid
-   */
-  ActivationType recurrent_activation_type;
-
-  /**
    * @brief     activation function for recurrent : default is sigmoid
    */
   ActiFunc recurrent_acti_func;
 
   /**
-   * @brief     variable to set return sequences
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
    * @brief     to pretect overflow
    */
   float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
 };
 } // namespace nntrainer
 
index 5c204da..e7d73d3 100644 (file)
@@ -32,8 +32,20 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;
 //  : [1, 1, 1, unit (hidden_size)]
 enum RNNParams { weight_xh, weight_hh, bias_h, hidden_state, dropout_mask };
 
+RNNLayer::RNNLayer() :
+  LayerImpl(),
+  rnn_props(props::Unit(), props::HiddenStateActivation(),
+            props::ReturnSequences(), props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
 void RNNLayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(rnn_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(rnn_props);
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
 
   if (context.getNumInputs() != 1) {
     throw std::invalid_argument("RNN layer takes only one input");
@@ -93,9 +105,9 @@ void RNNLayer::finalize(InitLayerContext &context) {
     context.requestTensor(d, context.getName() + ":hidden_state",
                           Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
   }
 
   if (!acti_func.supportInPlace())
@@ -104,62 +116,19 @@ void RNNLayer::finalize(InitLayerContext &context) {
 }
 
 void RNNLayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void RNNLayer::setProperty(const std::string &type_str,
-                           const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout:
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-    break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, rnn_props);
+  LayerImpl::setProperty(remain_props);
 }
 
 void RNNLayer::exportTo(Exporter &exporter, const ExportMethods &method) const {
   LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(rnn_props, method, this);
 }
 
 void RNNLayer::forwarding(RunLayerContext &context, bool training) {
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
+
   Tensor &weight_xh = context.getWeight(wt_idx[RNNParams::weight_xh]);
   Tensor &weight_hh = context.getWeight(wt_idx[RNNParams::weight_hh]);
   Tensor &bias_h = context.getWeight(wt_idx[RNNParams::bias_h]);
@@ -225,6 +194,9 @@ void RNNLayer::calcDerivative(RunLayerContext &context) {
 }
 
 void RNNLayer::calcGradient(RunLayerContext &context) {
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
+
   Tensor &djdw_x = context.getWeightGrad(wt_idx[RNNParams::weight_xh]);
   Tensor &djdw_h = context.getWeightGrad(wt_idx[RNNParams::weight_hh]);
   Tensor &djdb_h = context.getWeightGrad(wt_idx[RNNParams::bias_h]);
index d30005f..2fbf521 100644 (file)
@@ -30,17 +30,7 @@ public:
   /**
    * @brief     Constructor of RNNLayer
    */
-  RNNLayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    bool ret_sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    return_sequences(ret_sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  RNNLayer();
 
   /**
    * @brief     Destructor of RNNLayer
@@ -103,14 +93,17 @@ public:
   inline static const std::string type = "rnn";
 
 private:
-  std::tuple<props::Unit>
-    props; /**< rnn layer properties : unit - number of output neurons */
-  std::array<unsigned int, 5> wt_idx; /**< indices of the weights */
-
   /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation, props::ReturnSequences,
+             props::DropOutRate>
+    rnn_props;
+  std::array<unsigned int, 5> wt_idx; /**< indices of the weights */
 
   /**
    * @brief     activation function for h_t : default is tanh
@@ -118,29 +111,9 @@ private:
   ActiFunc acti_func;
 
   /**
-   * @brief     opiont for return sequence
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
    * @brief     to pretect overflow
    */
   float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
 };
 } // namespace nntrainer