[rnn lstm gru layer] Maintain rnn, lstm, gru layer property with props

author hyeonseok lee <hs89.lee@samsung.com>

Thu, 9 Sep 2021 14:11:06 +0000 (23:11 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 13 Sep 2021 11:59:57 +0000 (20:59 +0900)
author hyeonseok lee <hs89.lee@samsung.com>
Thu, 9 Sep 2021 14:11:06 +0000 (23:11 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 13 Sep 2021 11:59:57 +0000 (20:59 +0900)
diff --git a/nntrainer/layers/common_properties.cpp b/nntrainer/layers/common_properties.cpp

index 62eb92c..4ce910c 100644 (file)
--- a/nntrainer/layers/common_properties.cpp
+++ b/nntrainer/layers/common_properties.cpp
@@ -47,8 +47,8 @@ Normalization::Normalization(bool value) { set(value); }
  
  Standardization::Standardization(bool value) { set(value); }
  
-bool DropOutSpec::isValid(const float &v) const {
-  if (v <= 0.0)
+bool DropOutRate::isValid(const float &v) const {
+  if (v < 0.0)
      return false;
    else
      return true;
@@ -71,6 +71,8 @@ void FilePath::set(const std::string &v) {
  
  std::ifstream::pos_type FilePath::file_size() { return cached_pos_size; }
  
+ReturnSequences::ReturnSequences(bool value) { set(value); }
+
  bool NumClass::isValid(const unsigned int &v) const { return v > 0; }
  
  ConnectionSpec::ConnectionSpec(const std::vector<props::Name> &layer_ids_,
@@ -194,6 +196,14 @@ std::array<unsigned int, 4> Padding2D::compute(const TensorDim &input,
  
  std::string ConnectionSpec::NoneType = "";
  
+HiddenStateActivation::HiddenStateActivation(ActivationTypeInfo::Enum value) {
+  set(value);
+};
+
+RecurrentActivation::RecurrentActivation(ActivationTypeInfo::Enum value) {
+  set(value);
+};
+
  } // namespace props
  
  static const std::vector<std::pair<char, std::string>>
diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h

index 9d99bd0..1352849 100644 (file)
--- a/nntrainer/layers/common_properties.h
+++ b/nntrainer/layers/common_properties.h
@@ -379,22 +379,22 @@ public:
  };
  
  /**
- * @brief DropOutSpec property, this defines drop out specification of layer
+ * @brief DropOutRate property, this defines drop out specification of layer
   *
   */
-class DropOutSpec : public nntrainer::Property<float> {
+class DropOutRate : public nntrainer::Property<float> {
  
  public:
    /**
-   * @brief Construct a new DropOut object with a default value 0.0
+   * @brief Construct a new DropOutRate object with a default value 0.0
     *
     */
-  DropOutSpec(float value = 0.0) : nntrainer::Property<float>(value) {}
+  DropOutRate(float value = 0.0) : nntrainer::Property<float>(value) {}
    static constexpr const char *key = "dropout"; /**< unique key to access */
    using prop_tag = float_prop_tag;              /**< property type */
  
    /**
-   * @brief DropOutSpec validator
+   * @brief DropOutRate validator
     *
     * @param v float to validate
     * @retval true if it is greater or equal than 0.0
@@ -470,6 +470,22 @@ private:
  };
  
  /**
+ * @brief return sequence property, used to check
+ * whether return only the last output. Return last output if true.
+ *
+ */
+class ReturnSequences : public nntrainer::Property<bool> {
+public:
+  /**
+   * @brief Construct a new ReturnSequences object
+   *
+   */
+  ReturnSequences(bool value = false);
+  static constexpr const char *key = "return_sequences";
+  using prop_tag = bool_prop_tag;
+};
+
+/**
   * @brief Number of class
   * @todo deprecate this
   */
@@ -509,6 +525,40 @@ public:
  };
  
  /**
+ * @brief HiddenStateActivation Enumeration Information
+ *
+ */
+class HiddenStateActivation final : public EnumProperty<ActivationTypeInfo> {
+public:
+  /**
+   * @brief Construct a new HiddenStateActivation object with default value
+   * ActivationTypeInfo::Enum::ACT_NONE
+   *
+   */
+  HiddenStateActivation(
+    ActivationTypeInfo::Enum value = ActivationTypeInfo::Enum::ACT_NONE);
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "hidden_state_activation";
+};
+
+/**
+ * @brief RecurrentActivation Enumeration Information
+ *
+ */
+class RecurrentActivation final : public EnumProperty<ActivationTypeInfo> {
+public:
+  /**
+   * @brief Construct a new RecurrentActivation object with default value
+   * ActivationTypeInfo::Enum::ACT_NONE
+   *
+   */
+  RecurrentActivation(
+    ActivationTypeInfo::Enum value = ActivationTypeInfo::Enum::ACT_NONE);
+  using prop_tag = enum_class_prop_tag;
+  static constexpr const char *key = "recurrent_activation";
+};
+
+/**
   * @brief     Enumeration of pooling type
   */
  struct PoolingTypeInfo {
diff --git a/nntrainer/layers/dropout.cpp b/nntrainer/layers/dropout.cpp

index 02dba0b..789a0db 100644 (file)
--- a/nntrainer/layers/dropout.cpp
+++ b/nntrainer/layers/dropout.cpp
@@ -34,7 +34,7 @@ void DropOutLayer::finalize(InitLayerContext &context) {
  }
  
  void DropOutLayer::forwarding(RunLayerContext &context, bool training) {
-  auto &rate_ = std::get<props::DropOutSpec>(dropout_rate).get();
+  auto &rate_ = std::get<props::DropOutRate>(dropout_rate).get();
  
    // Assume it is in-place calculation. It means input and output share mem
    // buffer. So if the training is false, the output is the same with input. In
@@ -57,7 +57,7 @@ void DropOutLayer::forwarding(RunLayerContext &context, bool training) {
  
  void DropOutLayer::calcDerivative(RunLayerContext &context) {
    // Assume it is in-place calculation
-  auto &rate_ = std::get<props::DropOutSpec>(dropout_rate).get();
+  auto &rate_ = std::get<props::DropOutRate>(dropout_rate).get();
    if (rate_ > epsilon) {
      for (unsigned int i = 0; i < context.getNumInputs(); ++i) {
        Tensor &derivative_ = context.getIncomingDerivative(i);
diff --git a/nntrainer/layers/dropout.h b/nntrainer/layers/dropout.h

index b7615f5..544bdaa 100644 (file)
--- a/nntrainer/layers/dropout.h
+++ b/nntrainer/layers/dropout.h
@@ -32,7 +32,7 @@ public:
     */
    DropOutLayer(float dropout = 0.0) :
      Layer(),
-    dropout_rate(props::DropOutSpec(dropout)),
+    dropout_rate(props::DropOutRate(dropout)),
      epsilon(1e-3) {}
  
    /**
@@ -99,7 +99,7 @@ public:
    inline static const std::string type = "dropout";
  
  private:
-  std::tuple<props::DropOutSpec> dropout_rate;
+  std::tuple<props::DropOutRate> dropout_rate;
    std::vector<unsigned int> mask_idx;
    float epsilon;
  };
diff --git a/nntrainer/layers/gru.cpp b/nntrainer/layers/gru.cpp

index 07e9481..1131f4e 100644 (file)
--- a/nntrainer/layers/gru.cpp
+++ b/nntrainer/layers/gru.cpp
@@ -51,6 +51,16 @@ enum GRUParams {
  
  #define NUM_GATE 3
  
+GRULayer::GRULayer() :
+  LayerImpl(),
+  gru_props(props::Unit(), props::HiddenStateActivation(),
+            props::RecurrentActivation(), props::ReturnSequences(),
+            props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  recurrent_acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
  // - weight_xh ( input to hidden )
  //  : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
  // - weight_hh ( hidden to hidden )
@@ -58,7 +68,13 @@ enum GRUParams {
  // - bias_h ( hidden bias )
  //  : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
  void GRULayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(gru_props);
+  auto &recurrent_activation_type =
+    std::get<props::RecurrentActivation>(gru_props);
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
  
    if (context.getNumInputs() != 1) {
      throw std::invalid_argument("GRU layer takes only one input");
@@ -132,80 +148,32 @@ void GRULayer::finalize(InitLayerContext &context) {
      h_dim, context.getName() + ":h_prev", Tensor::Initializer::NONE, false,
      FORWARD_FUNC_LIFESPAN);
  
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
    }
  
-  if (recurrent_activation_type == ActivationType::ACT_NONE) {
-    recurrent_activation_type = ActivationType::ACT_SIGMOID;
-    recurrent_acti_func.setActiFunc(recurrent_activation_type);
+  if (recurrent_activation_type.get() == ActivationType::ACT_NONE) {
+    recurrent_activation_type.set(ActivationType::ACT_SIGMOID);
+    recurrent_acti_func.setActiFunc(recurrent_activation_type.get());
    }
  }
  
  void GRULayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void GRULayer::setProperty(const std::string &type_str,
-                           const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::recurrent_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    recurrent_activation_type = acti_type;
-    recurrent_acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout: {
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-  } break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, gru_props);
+  LayerImpl::setProperty(remain_props);
  }
  
  void GRULayer::exportTo(Exporter &exporter, const ExportMethods &method) const {
    LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(gru_props, method, this);
  }
  
  void GRULayer::forwarding(RunLayerContext &context, bool training) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
+
    Tensor &weight_xh = context.getWeight(wt_idx[GRUParams::weight_xh]);
    Tensor &weight_hh = context.getWeight(wt_idx[GRUParams::weight_hh]);
    Tensor &bias_h = context.getWeight(wt_idx[GRUParams::bias_h]);
@@ -319,7 +287,10 @@ void GRULayer::calcDerivative(RunLayerContext &context) {
  }
  
  void GRULayer::calcGradient(RunLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(gru_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(gru_props);
+  float dropout_rate = std::get<props::DropOutRate>(gru_props);
+
    Tensor &djdw_x = context.getWeightGrad(wt_idx[GRUParams::weight_xh]);
    Tensor &djdw_h = context.getWeightGrad(wt_idx[GRUParams::weight_hh]);
    Tensor &djdb_h = context.getWeightGrad(wt_idx[GRUParams::bias_h]);
diff --git a/nntrainer/layers/gru.h b/nntrainer/layers/gru.h

index 4394387..7fef28f 100644 (file)
--- a/nntrainer/layers/gru.h
+++ b/nntrainer/layers/gru.h
@@ -30,20 +30,7 @@ public:
    /**
     * @brief     Constructor of GRULayer
     */
-  GRULayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
-    bool sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    recurrent_activation_type(recurrent_activation_type_),
-    recurrent_acti_func(recurrent_activation_type, true),
-    return_sequences(sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  GRULayer();
  
    /**
     * @brief     Destructor of GRULayer
@@ -106,14 +93,19 @@ public:
    inline static const std::string type = "gru";
  
  private:
-  std::tuple<props::Unit>
-    props; /**< lstm layer properties : unit - number of output neurons */
-  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
    /**
-   * @brief     activation type for hidden state : default is sigmoid
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * RecurrentActivation: activation type for recurrent. default is sigmoid
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation,
+             props::RecurrentActivation, props::ReturnSequences,
+             props::DropOutRate>
+    gru_props;
+  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
  
    /**
     * @brief     activation function for h_t : default is sigmoid
@@ -121,39 +113,14 @@ private:
    ActiFunc acti_func;
  
    /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType recurrent_activation_type;
-
-  /**
     * @brief     activation function for recurrent : default is tanh
     */
    ActiFunc recurrent_acti_func;
  
    /**
-   * @brief     variable to set return sequences
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
     * @brief     to pretect overflow
     */
    float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
  };
  } // namespace nntrainer
  
diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp

index 0eb455b..1243251 100644 (file)
--- a/nntrainer/layers/lstm.cpp
+++ b/nntrainer/layers/lstm.cpp
@@ -36,6 +36,16 @@ enum LSTMParams {
  
  #define NUM_GATE 4
  
+LSTMLayer::LSTMLayer() :
+  LayerImpl(),
+  lstm_props(props::Unit(), props::HiddenStateActivation(),
+             props::RecurrentActivation(), props::ReturnSequences(),
+             props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  recurrent_acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
  // - weight_xh ( input to hidden )
  //  : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
  // - weight_hh ( hidden to hidden )
@@ -43,7 +53,13 @@ enum LSTMParams {
  // - bias_h ( hidden bias )
  //  : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
  void LSTMLayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(lstm_props);
+  auto &recurrent_activation_type =
+    std::get<props::RecurrentActivation>(lstm_props);
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
  
    if (context.getNumInputs() != 1) {
      throw std::invalid_argument("LSTM layer takes only one input");
@@ -113,81 +129,33 @@ void LSTMLayer::finalize(InitLayerContext &context) {
      context.requestTensor(d, context.getName() + ":fgio",
                            Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
  
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
    }
  
-  if (recurrent_activation_type == ActivationType::ACT_NONE) {
-    recurrent_activation_type = ActivationType::ACT_SIGMOID;
-    recurrent_acti_func.setActiFunc(recurrent_activation_type);
+  if (recurrent_activation_type.get() == ActivationType::ACT_NONE) {
+    recurrent_activation_type.set(ActivationType::ACT_SIGMOID);
+    recurrent_acti_func.setActiFunc(recurrent_activation_type.get());
    }
  }
  
  void LSTMLayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void LSTMLayer::setProperty(const std::string &type_str,
-                            const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::recurrent_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    recurrent_activation_type = acti_type;
-    recurrent_acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout:
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-    break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, lstm_props);
+  LayerImpl::setProperty(remain_props);
  }
  
  void LSTMLayer::exportTo(Exporter &exporter,
                           const ExportMethods &method) const {
    LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(lstm_props, method, this);
  }
  
  void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
+
    Tensor &weight_xh = context.getWeight(wt_idx[LSTMParams::weight_xh]);
    Tensor &weight_hh = context.getWeight(wt_idx[LSTMParams::weight_hh]);
    Tensor &bias_h = context.getWeight(wt_idx[LSTMParams::bias_h]);
@@ -282,7 +250,10 @@ void LSTMLayer::calcDerivative(RunLayerContext &context) {
  }
  
  void LSTMLayer::calcGradient(RunLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(lstm_props).get();
+  bool return_sequences = std::get<props::ReturnSequences>(lstm_props);
+  float dropout_rate = std::get<props::DropOutRate>(lstm_props);
+
    Tensor &djdw_x = context.getWeightGrad(wt_idx[LSTMParams::weight_xh]);
    Tensor &djdw_h = context.getWeightGrad(wt_idx[LSTMParams::weight_hh]);
    Tensor &djdb_h = context.getWeightGrad(wt_idx[LSTMParams::bias_h]);
diff --git a/nntrainer/layers/lstm.h b/nntrainer/layers/lstm.h

index 20638f1..b034b6b 100644 (file)
--- a/nntrainer/layers/lstm.h
+++ b/nntrainer/layers/lstm.h
@@ -30,20 +30,7 @@ public:
    /**
     * @brief     Constructor of LSTMLayer
     */
-  LSTMLayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
-    bool sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    recurrent_activation_type(recurrent_activation_type_),
-    recurrent_acti_func(recurrent_activation_type, true),
-    return_sequences(sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  LSTMLayer();
  
    /**
     * @brief     Destructor of LSTMLayer
@@ -106,14 +93,19 @@ public:
    inline static const std::string type = "lstm";
  
  private:
-  std::tuple<props::Unit>
-    props; /**< lstm layer properties : unit - number of output neurons */
-  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
    /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * RecurrentActivation: activation type for recurrent. default is sigmoid
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation,
+             props::RecurrentActivation, props::ReturnSequences,
+             props::DropOutRate>
+    lstm_props;
+  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
  
    /**
     * @brief     activation function for h_t : default is tanh
@@ -121,39 +113,14 @@ private:
    ActiFunc acti_func;
  
    /**
-   * @brief     activation type for recurrent : default is sigmoid
-   */
-  ActivationType recurrent_activation_type;
-
-  /**
     * @brief     activation function for recurrent : default is sigmoid
     */
    ActiFunc recurrent_acti_func;
  
    /**
-   * @brief     variable to set return sequences
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
     * @brief     to pretect overflow
     */
    float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
  };
  } // namespace nntrainer
  
diff --git a/nntrainer/layers/rnn.cpp b/nntrainer/layers/rnn.cpp

index 5c204da..e7d73d3 100644 (file)
--- a/nntrainer/layers/rnn.cpp
+++ b/nntrainer/layers/rnn.cpp
@@ -32,8 +32,20 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;
  //  : [1, 1, 1, unit (hidden_size)]
  enum RNNParams { weight_xh, weight_hh, bias_h, hidden_state, dropout_mask };
  
+RNNLayer::RNNLayer() :
+  LayerImpl(),
+  rnn_props(props::Unit(), props::HiddenStateActivation(),
+            props::ReturnSequences(), props::DropOutRate()),
+  wt_idx({0}),
+  acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
  void RNNLayer::finalize(InitLayerContext &context) {
-  auto unit = std::get<props::Unit>(props).get();
+  auto unit = std::get<props::Unit>(rnn_props).get();
+  auto &hidden_state_activation_type =
+    std::get<props::HiddenStateActivation>(rnn_props);
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
  
    if (context.getNumInputs() != 1) {
      throw std::invalid_argument("RNN layer takes only one input");
@@ -93,9 +105,9 @@ void RNNLayer::finalize(InitLayerContext &context) {
      context.requestTensor(d, context.getName() + ":hidden_state",
                            Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
  
-  if (hidden_state_activation_type == ActivationType::ACT_NONE) {
-    hidden_state_activation_type = ActivationType::ACT_TANH;
-    acti_func.setActiFunc(hidden_state_activation_type);
+  if (hidden_state_activation_type.get() == ActivationType::ACT_NONE) {
+    hidden_state_activation_type.set(ActivationType::ACT_TANH);
+    acti_func.setActiFunc(hidden_state_activation_type.get());
    }
  
    if (!acti_func.supportInPlace())
@@ -104,62 +116,19 @@ void RNNLayer::finalize(InitLayerContext &context) {
  }
  
  void RNNLayer::setProperty(const std::vector<std::string> &values) {
-  /// @todo: deprecate this in favor of loadProperties
-  auto remain_props = loadProperties(values, props);
-  for (unsigned int i = 0; i < remain_props.size(); ++i) {
-    std::string key;
-    std::string value;
-    std::stringstream ss;
-
-    if (getKeyValue(remain_props[i], key, value) != ML_ERROR_NONE) {
-      throw std::invalid_argument("Error parsing the property: " +
-                                  remain_props[i]);
-    }
-
-    if (value.empty()) {
-      ss << "value is empty: key: " << key << ", value: " << value;
-      throw std::invalid_argument(ss.str());
-    }
-
-    /// @note this calls derived setProperty if available
-    setProperty(key, value);
-  }
-}
-
-void RNNLayer::setProperty(const std::string &type_str,
-                           const std::string &value) {
-  using PropertyType = nntrainer::Layer::PropertyType;
-  int status = ML_ERROR_NONE;
-  nntrainer::Layer::PropertyType type =
-    static_cast<nntrainer::Layer::PropertyType>(parseLayerProperty(type_str));
-
-  // TODO : Add return_state property & api to get the hidden input
-  switch (type) {
-  case PropertyType::hidden_state_activation: {
-    ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
-    hidden_state_activation_type = acti_type;
-    acti_func.setActiFunc(acti_type);
-  } break;
-  case PropertyType::return_sequences: {
-    status = setBoolean(return_sequences, value);
-    throw_status(status);
-  } break;
-  case PropertyType::dropout:
-    status = setFloat(dropout_rate, value);
-    throw_status(status);
-    break;
-  default:
-    LayerImpl::setProperty(type_str, value);
-    break;
-  }
+  auto remain_props = loadProperties(values, rnn_props);
+  LayerImpl::setProperty(remain_props);
  }
  
  void RNNLayer::exportTo(Exporter &exporter, const ExportMethods &method) const {
    LayerImpl::exportTo(exporter, method);
-  exporter.saveResult(props, method, this);
+  exporter.saveResult(rnn_props, method, this);
  }
  
  void RNNLayer::forwarding(RunLayerContext &context, bool training) {
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
+
    Tensor &weight_xh = context.getWeight(wt_idx[RNNParams::weight_xh]);
    Tensor &weight_hh = context.getWeight(wt_idx[RNNParams::weight_hh]);
    Tensor &bias_h = context.getWeight(wt_idx[RNNParams::bias_h]);
@@ -225,6 +194,9 @@ void RNNLayer::calcDerivative(RunLayerContext &context) {
  }
  
  void RNNLayer::calcGradient(RunLayerContext &context) {
+  bool return_sequences = std::get<props::ReturnSequences>(rnn_props);
+  float dropout_rate = std::get<props::DropOutRate>(rnn_props);
+
    Tensor &djdw_x = context.getWeightGrad(wt_idx[RNNParams::weight_xh]);
    Tensor &djdw_h = context.getWeightGrad(wt_idx[RNNParams::weight_hh]);
    Tensor &djdb_h = context.getWeightGrad(wt_idx[RNNParams::bias_h]);
diff --git a/nntrainer/layers/rnn.h b/nntrainer/layers/rnn.h

index d30005f..2fbf521 100644 (file)
--- a/nntrainer/layers/rnn.h
+++ b/nntrainer/layers/rnn.h
@@ -30,17 +30,7 @@ public:
    /**
     * @brief     Constructor of RNNLayer
     */
-  RNNLayer(
-    ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
-    bool ret_sequence = false, float dropout = 0.0) :
-    LayerImpl(),
-    props(props::Unit()),
-    wt_idx({0}),
-    hidden_state_activation_type(hidden_state_activation_type_),
-    acti_func(hidden_state_activation_type, true),
-    return_sequences(ret_sequence),
-    dropout_rate(dropout),
-    epsilon(1e-3) {}
+  RNNLayer();
  
    /**
     * @brief     Destructor of RNNLayer
@@ -103,14 +93,17 @@ public:
    inline static const std::string type = "rnn";
  
  private:
-  std::tuple<props::Unit>
-    props; /**< rnn layer properties : unit - number of output neurons */
-  std::array<unsigned int, 5> wt_idx; /**< indices of the weights */
-
    /**
-   * @brief     activation type for recurrent : default is tanh
-   */
-  ActivationType hidden_state_activation_type;
+   * Unit: number of output neurons
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * ReturnSequence: option for return sequence
+   * DropOutRate: dropout rate
+   *
+   * */
+  std::tuple<props::Unit, props::HiddenStateActivation, props::ReturnSequences,
+             props::DropOutRate>
+    rnn_props;
+  std::array<unsigned int, 5> wt_idx; /**< indices of the weights */
  
    /**
     * @brief     activation function for h_t : default is tanh
@@ -118,29 +111,9 @@ private:
    ActiFunc acti_func;
  
    /**
-   * @brief     opiont for return sequence
-   */
-  bool return_sequences;
-
-  /**
-   * @brief     drop out rate
-   */
-  float dropout_rate;
-
-  /**
     * @brief     to pretect overflow
     */
    float epsilon;
-
-  /**
-   * @brief setProperty by type and value separated
-   * @param[in] type property type to be passed
-   * @param[in] value value to be passed
-   * @exception exception::not_supported     when property type is not valid for
-   * the particular layer
-   * @exception std::invalid_argument invalid argument
-   */
-  void setProperty(const std::string &type, const std::string &value);
  };
  } // namespace nntrainer
author	hyeonseok lee <hs89.lee@samsung.com>
	Thu, 9 Sep 2021 14:11:06 +0000 (23:11 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 13 Sep 2021 11:59:57 +0000 (20:59 +0900)
nntrainer/layers/common_properties.cpp		patch \| blob \| history
nntrainer/layers/common_properties.h		patch \| blob \| history
nntrainer/layers/dropout.cpp		patch \| blob \| history
nntrainer/layers/dropout.h		patch \| blob \| history
nntrainer/layers/gru.cpp		patch \| blob \| history
nntrainer/layers/gru.h		patch \| blob \| history
nntrainer/layers/lstm.cpp		patch \| blob \| history
nntrainer/layers/lstm.h		patch \| blob \| history
nntrainer/layers/rnn.cpp		patch \| blob \| history
nntrainer/layers/rnn.h		patch \| blob \| history