[lstm] make lstm_core as class

author hyeonseok lee <hs89.lee@samsung.com>

Thu, 8 Sep 2022 08:53:59 +0000 (17:53 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 30 Sep 2022 01:28:29 +0000 (10:28 +0900)
author hyeonseok lee <hs89.lee@samsung.com>
Thu, 8 Sep 2022 08:53:59 +0000 (17:53 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 30 Sep 2022 01:28:29 +0000 (10:28 +0900)
diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp

index 97b32bdfaf92dd1667cf32117b3f05935ebda102..d235139b4f32bd6df4839b2dbf4ca401f379f9bc 100644 (file)
--- a/nntrainer/layers/lstm.cpp
+++ b/nntrainer/layers/lstm.cpp
@@ -13,7 +13,6 @@
  
  #include <layer_context.h>
  #include <lstm.h>
-#include <lstmcell_core.h>
  #include <nntr_threads.h>
  #include <nntrainer_error.h>
  #include <nntrainer_log.h>
@@ -43,35 +42,7 @@ enum LSTMParams {
    dropout_mask
  };
  
-/**
- * @brief run lstm fowarding for batch_first input
- *
- * @param NUM_GATE Number of gate which is 4 for lstm
- * @param batch_size batch size
- * @param feature_size feature size
- * @param disable_bias whether to disable bias or not
- * @param unit number of output neurons
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param enable_dropout whether to apply dropout
- * @param dropout_rate dropout rate
- * @param max_timestep maximum timestep for lstm
- * @param reverse indicate forward/backward direction for input in bidirectional
- * lstm
- * @param input_ input
- * @param weight_ih weight for input to hidden
- * @param weight_hh weight for hidden to hidden
- * @param bias_h bias for input and hidden.
- * @param bias_ih bias for input
- * @param bias_hh bias for hidden
- * @param hidden_state_ hidden state
- * @param cell_state_ cell state
- * @param ifgo_ input gate, forget gate, memory cell, output gate
- * @param mask_ dropout mask
- */
-static void batch_first_forwarding(
+void LSTMLayer::forwardingBatchFirstLSTM(
    unsigned int NUM_GATE, const unsigned int batch_size,
    const unsigned int feature_size, const bool disable_bias,
    const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
@@ -118,10 +89,10 @@ static void batch_first_forwarding(
          {NUM_GATE * unit},
          (reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
  
-      lstmcell_forwarding(1, unit, disable_bias, integrate_bias, acti_func,
-                          recurrent_acti_func, input, prev_hidden_state,
-                          prev_cell_state, hidden_state, cell_state, weight_ih,
-                          weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+      forwardLSTM(1, unit, disable_bias, integrate_bias, acti_func,
+                  recurrent_acti_func, input, prev_hidden_state,
+                  prev_cell_state, hidden_state, cell_state, weight_ih,
+                  weight_hh, bias_h, bias_ih, bias_hh, ifgo);
  
        if (enable_dropout) {
          Tensor mask_sample = mask_.getBatchSlice(batch, 1);
@@ -133,42 +104,7 @@ static void batch_first_forwarding(
    }
  }
  
-/**
- * @brief calculate lstm gradient for batch_first input
- *
- * @param NUM_GATE Number of gate which is 4 for lstm
- * @param batch_size batch size
- * @param feature_size feature size
- * @param disable_bias whether to disable bias or not
- * @param unit number of output neurons
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param return_sequences return sequeces
- * @param bidirectional bidirectional lstm
- * @param enable_dropout whether to apply dropout
- * @param dropout_rate dropout rate
- * @param max_timestep maximum timestep for lstm
- * @param reverse indicate forward/backward direction for input in bidirectional
- * lstm
- * @param input_ input
- * @param incoming_derivative derivative for output which is incoming derivative
- * @param d_weight_ih weight_ih(weight for input to hidden) gradient
- * @param weight_hh weight for hidden to hidden
- * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
- * @param d_bias_h bias_h(bias for input and hidden) gradient
- * @param d_bias_ih bias_ih(bias for input) gradient
- * @param d_bias_hh bias_hh(bias for hidden) gradient
- * @param hidden_state_ hidden state
- * @param d_hidden_state_ hidden state gradient
- * @param cell_state_ cell state
- * @param d_cell_state_ cell state gradient
- * @param ifgo_ input gate, forget gate, memory cell, output gate
- * @param d_ifgo_ gradient for input gate, forget gate, memory cell, output gate
- * @param mask_ dropout mask
- */
-void batch_first_calcGradient(
+void LSTMLayer::calcGradientBatchFirstLSTM(
    unsigned int NUM_GATE, const unsigned int batch_size,
    const unsigned int feature_size, const bool disable_bias,
    const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
@@ -327,7 +263,7 @@ void batch_first_calcGradient(
            // already have precalculated values from incomming derivatives
            Tensor d_prev_hidden_state_temp;
  
-          lstmcell_calcGradient(
+          calcGradientLSTM(
              1, unit, disable_bias, integrate_bias, acti_func,
              recurrent_acti_func, input, prev_hidden_state,
              d_prev_hidden_state_temp, prev_cell_state, d_prev_cell_state,
@@ -423,12 +359,12 @@ void batch_first_calcGradient(
          // already have precalculated values from incomming derivatives
          Tensor d_prev_hidden_state_temp;
  
-        lstmcell_calcGradient(1, unit, disable_bias, integrate_bias, acti_func,
-                              recurrent_acti_func, input, prev_hidden_state,
-                              d_prev_hidden_state_temp, prev_cell_state,
-                              d_prev_cell_state, d_hidden_state, cell_state,
-                              d_cell_state, d_weight_ih, weight_hh, d_weight_hh,
-                              d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
+        calcGradientLSTM(1, unit, disable_bias, integrate_bias, acti_func,
+                         recurrent_acti_func, input, prev_hidden_state,
+                         d_prev_hidden_state_temp, prev_cell_state,
+                         d_prev_cell_state, d_hidden_state, cell_state,
+                         d_cell_state, d_weight_ih, weight_hh, d_weight_hh,
+                         d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
          d_prev_hidden_state.add_i(d_prev_hidden_state_temp);
        }
      }
@@ -436,15 +372,9 @@ void batch_first_calcGradient(
  }
  
  LSTMLayer::LSTMLayer() :
-  LayerImpl(),
-  lstm_props(props::Unit(), props::IntegrateBias(),
-             props::HiddenStateActivation() = ActivationType::ACT_TANH,
-             props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
-             props::ReturnSequences(), props::Bidirectional(),
-             props::DropOutRate(), props::MaxTimestep()),
-  acti_func(ActivationType::ACT_NONE, true),
-  recurrent_acti_func(ActivationType::ACT_NONE, true),
-  epsilon(1e-3) {
+  LSTMCore(),
+  lstm_props(props::ReturnSequences(), props::Bidirectional(),
+             props::DropOutRate(), props::MaxTimestep()) {
    wt_idx.fill(std::numeric_limits<unsigned>::max());
  }
  
@@ -462,15 +392,17 @@ void LSTMLayer::finalize(InitLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  NNTR_THROW_IF(std::get<props::Unit>(lstm_props).empty(),
+  NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
                  std::invalid_argument)
      << "unit property missing for lstm layer";
-  const unsigned int unit = std::get<props::Unit>(lstm_props).get();
-  const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+  const bool integrate_bias =
+    std::get<props::IntegrateBias>(lstmcore_props).get();
    const ActivationType hidden_state_activation_type =
-    std::get<props::HiddenStateActivation>(lstm_props).get();
+    std::get<props::HiddenStateActivation>(lstmcore_props).get();
    const ActivationType recurrent_activation_type =
-    std::get<props::RecurrentActivation>(lstm_props).get();
+    std::get<props::RecurrentActivation>(lstmcore_props).get();
+
    const bool return_sequences =
      std::get<props::ReturnSequences>(lstm_props).get();
    const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
@@ -631,12 +563,12 @@ void LSTMLayer::finalize(InitLayerContext &context) {
  void LSTMLayer::setProperty(const std::vector<std::string> &values) {
    const std::vector<std::string> &remain_props =
      loadProperties(values, lstm_props);
-  LayerImpl::setProperty(remain_props);
+  LSTMCore::setProperty(remain_props);
  }
  
  void LSTMLayer::exportTo(Exporter &exporter,
                           const ml::train::ExportMethods &method) const {
-  LayerImpl::exportTo(exporter, method);
+  LSTMCore::exportTo(exporter, method);
    exporter.saveResult(lstm_props, method, this);
  }
  
@@ -644,8 +576,10 @@ void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(lstm_props).get();
-  const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+  const bool integrate_bias =
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
    const bool return_sequences =
      std::get<props::ReturnSequences>(lstm_props).get();
    const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
@@ -683,11 +617,11 @@ void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
                     ? context.getTensor(wt_idx[LSTMParams::dropout_mask])
                     : empty;
  
-  batch_first_forwarding(NUM_GATE, batch_size, feature_size, disable_bias, unit,
-                         integrate_bias, acti_func, recurrent_acti_func,
-                         enable_dropout, dropout_rate, max_timestep, false,
-                         input, weight_ih, weight_hh, bias_h, bias_ih, bias_hh,
-                         hidden_state, cell_state, ifgo, mask);
+  forwardingBatchFirstLSTM(NUM_GATE, batch_size, feature_size, disable_bias,
+                           unit, integrate_bias, acti_func, recurrent_acti_func,
+                           enable_dropout, dropout_rate, max_timestep, false,
+                           input, weight_ih, weight_hh, bias_h, bias_ih,
+                           bias_hh, hidden_state, cell_state, ifgo, mask);
  
    if (bidirectional) {
      const Tensor &reverse_weight_ih =
@@ -713,7 +647,7 @@ void LSTMLayer::forwarding(RunLayerContext &context, bool training) {
        context.getTensor(wt_idx[LSTMParams::reverse_cell_state]);
      Tensor &reverse_ifgo = context.getTensor(wt_idx[LSTMParams::reverse_ifgo]);
  
-    batch_first_forwarding(
+    forwardingBatchFirstLSTM(
        NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
        acti_func, recurrent_acti_func, enable_dropout, dropout_rate,
        max_timestep, true, input, reverse_weight_ih, reverse_weight_hh,
@@ -759,7 +693,7 @@ void LSTMLayer::calcDerivative(RunLayerContext &context) {
    const Tensor &weight_ih = context.getWeight(wt_idx[LSTMParams::weight_ih]);
    const Tensor &d_ifgos = context.getTensorGrad(wt_idx[LSTMParams::ifgo]);
  
-  lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgos);
+  calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgos);
  
    if (bidirectional) {
      const Tensor &reverse_weight_ih =
@@ -767,8 +701,8 @@ void LSTMLayer::calcDerivative(RunLayerContext &context) {
      const Tensor &reverse_d_ifgos =
        context.getTensorGrad(wt_idx[LSTMParams::reverse_ifgo]);
  
-    lstmcell_calcDerivative(outgoing_derivative, reverse_weight_ih,
-                            reverse_d_ifgos, 1.0f);
+    calcDerivativeLSTM(outgoing_derivative, reverse_weight_ih, reverse_d_ifgos,
+                       1.0f);
    }
  }
  
@@ -776,8 +710,10 @@ void LSTMLayer::calcGradient(RunLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(lstm_props).get();
-  const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+  const bool integrate_bias =
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
    const bool return_sequences =
      std::get<props::ReturnSequences>(lstm_props).get();
    const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
@@ -822,7 +758,7 @@ void LSTMLayer::calcGradient(RunLayerContext &context) {
                           ? context.getTensor(wt_idx[LSTMParams::dropout_mask])
                           : empty;
  
-  batch_first_calcGradient(
+  calcGradientBatchFirstLSTM(
      NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
      acti_func, recurrent_acti_func, return_sequences, bidirectional,
      enable_dropout, dropout_rate, max_timestep, false, input,
@@ -864,7 +800,7 @@ void LSTMLayer::calcGradient(RunLayerContext &context) {
      Tensor &reverse_d_ifgo =
        context.getTensorGrad(wt_idx[LSTMParams::reverse_ifgo]);
  
-    batch_first_calcGradient(
+    calcGradientBatchFirstLSTM(
        NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
        acti_func, recurrent_acti_func, return_sequences, bidirectional,
        enable_dropout, dropout_rate, max_timestep, true, input,
diff --git a/nntrainer/layers/lstm.h b/nntrainer/layers/lstm.h

index 97b810fac2e5a7fabf7fc2c6480d684dc275e29f..f35fdf88154d483b7a49d6e3853d427b7547f7c5 100644 (file)
--- a/nntrainer/layers/lstm.h
+++ b/nntrainer/layers/lstm.h
@@ -17,7 +17,7 @@
  
  #include <acti_func.h>
  #include <common_properties.h>
-#include <layer_impl.h>
+#include <lstmcell_core.h>
  
  namespace nntrainer {
  
@@ -25,7 +25,7 @@ namespace nntrainer {
   * @class   LSTMLayer
   * @brief   LSTMLayer
   */
-class LSTMLayer : public LayerImpl {
+class LSTMLayer : public LSTMCore {
  public:
    /**
     * @brief     Constructor of LSTMLayer
@@ -102,37 +102,110 @@ public:
  private:
    static constexpr unsigned int NUM_GATE = 4;
  
+  /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+   * RecurrentActivation are in lstmcore_props */
+
    /**
-   * Unit: number of output neurons
-   * IntegrateBias: integrate bias_ih, bias_hh to bias_h
-   * HiddenStateActivation: activation type for hidden state. default is tanh
-   * RecurrentActivation: activation type for recurrent. default is sigmoid
     * ReturnSequence: option for return sequence
     * Bidirectional: option for bidirectional
     * DropOutRate: dropout rate
     * MaxTimestep: maximum timestep for lstm
     *
     * */
-  std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
-             props::RecurrentActivation, props::ReturnSequences,
-             props::Bidirectional, props::DropOutRate, props::MaxTimestep>
+  std::tuple<props::ReturnSequences, props::Bidirectional, props::DropOutRate,
+             props::MaxTimestep>
      lstm_props;
    std::array<unsigned int, 17> wt_idx; /**< indices of the weights */
  
    /**
-   * @brief     activation function for h_t : default is tanh
-   */
-  ActiFunc acti_func;
-
-  /**
-   * @brief     activation function for recurrent : default is sigmoid
-   */
-  ActiFunc recurrent_acti_func;
-
-  /**
-   * @brief     to protect overflow
-   */
-  float epsilon;
+   * @brief run lstm fowarding for batch_first input
+   *
+   * @param NUM_GATE Number of gate which is 4 for lstm
+   * @param batch_size batch size
+   * @param feature_size feature size
+   * @param disable_bias whether to disable bias or not
+   * @param unit number of output neurons
+   * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+   * @param acti_func activation function for memory cell, cell state
+   * @param recurrent_acti_func activation function for input/output/forget
+   * gate
+   * @param enable_dropout whether to apply dropout
+   * @param dropout_rate dropout rate
+   * @param max_timestep maximum timestep for lstm
+   * @param reverse indicate forward/backward direction for input in
+   * bidirectional lstm
+   * @param input_ input
+   * @param weight_ih weight for input to hidden
+   * @param weight_hh weight for hidden to hidden
+   * @param bias_h bias for input and hidden.
+   * @param bias_ih bias for input
+   * @param bias_hh bias for hidden
+   * @param hidden_state_ hidden state
+   * @param cell_state_ cell state
+   * @param ifgo_ input gate, forget gate, memory cell, output gate
+   * @param mask_ dropout mask
+   */
+  void forwardingBatchFirstLSTM(
+    unsigned int NUM_GATE, const unsigned int batch_size,
+    const unsigned int feature_size, const bool disable_bias,
+    const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
+    ActiFunc &recurrent_acti_func, const bool enable_dropout,
+    const float dropout_rate, const unsigned int max_timestep,
+    const bool reverse, const Tensor &input_, const Tensor &weight_ih,
+    const Tensor &weight_hh, const Tensor &bias_h, const Tensor &bias_ih,
+    const Tensor &bias_hh, Tensor &hidden_state_, Tensor &cell_state_,
+    Tensor &ifgo_, const Tensor &mask_);
+
+  /**
+   * @brief calculate lstm gradient for batch_first input
+   *
+   * @param NUM_GATE Number of gate which is 4 for lstm
+   * @param batch_size batch size
+   * @param feature_size feature size
+   * @param disable_bias whether to disable bias or not
+   * @param unit number of output neurons
+   * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+   * @param acti_func activation function for memory cell, cell state
+   * @param recurrent_acti_func activation function for input/output/forget
+   * gate
+   * @param return_sequences return sequeces
+   * @param bidirectional bidirectional lstm
+   * @param enable_dropout whether to apply dropout
+   * @param dropout_rate dropout rate
+   * @param max_timestep maximum timestep for lstm
+   * @param reverse indicate forward/backward direction for input in
+   * bidirectional lstm
+   * @param input_ input
+   * @param incoming_derivative derivative for output which is incoming
+   * derivative
+   * @param d_weight_ih weight_ih(weight for input to hidden) gradient
+   * @param weight_hh weight for hidden to hidden
+   * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
+   * @param d_bias_h bias_h(bias for input and hidden) gradient
+   * @param d_bias_ih bias_ih(bias for input) gradient
+   * @param d_bias_hh bias_hh(bias for hidden) gradient
+   * @param hidden_state_ hidden state
+   * @param d_hidden_state_ hidden state gradient
+   * @param cell_state_ cell state
+   * @param d_cell_state_ cell state gradient
+   * @param ifgo_ input gate, forget gate, memory cell, output gate
+   * @param d_ifgo_ gradient for input gate, forget gate, memory cell, output
+   * gate
+   * @param mask_ dropout mask
+   */
+  void calcGradientBatchFirstLSTM(
+    unsigned int NUM_GATE, const unsigned int batch_size,
+    const unsigned int feature_size, const bool disable_bias,
+    const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
+    ActiFunc &recurrent_acti_func, const bool return_sequences,
+    const bool bidirectional, const bool enable_dropout,
+    const float dropout_rate, const unsigned int max_timestep,
+    const bool reverse, const Tensor &input_, const Tensor &incoming_derivative,
+    Tensor &d_weight_ih, const Tensor &weight_hh, Tensor &d_weight_hh,
+    Tensor &d_bias_h, Tensor &d_bias_ih, Tensor &d_bias_hh,
+    const Tensor &hidden_state_, Tensor &d_hidden_state_,
+    const Tensor &cell_state_, Tensor &d_cell_state_, const Tensor &ifgo_,
+    Tensor &d_ifgo_, const Tensor &mask_);
  };
  } // namespace nntrainer
  
diff --git a/nntrainer/layers/lstmcell.cpp b/nntrainer/layers/lstmcell.cpp

index cd1f3f0d4edec207f38a8a3c0ebbe44834c068e6..20753898e5b3abc035638ca35be144579e0b4175 100644 (file)
--- a/nntrainer/layers/lstmcell.cpp
+++ b/nntrainer/layers/lstmcell.cpp
@@ -13,7 +13,6 @@
  
  #include <layer_context.h>
  #include <lstmcell.h>
-#include <lstmcell_core.h>
  #include <nntrainer_error.h>
  #include <nntrainer_log.h>
  #include <node_exporter.h>
@@ -30,15 +29,7 @@ enum LSTMCellParams {
    dropout_mask
  };
  
-LSTMCellLayer::LSTMCellLayer() :
-  LayerImpl(),
-  lstmcell_props(props::Unit(), props::IntegrateBias(),
-                 props::HiddenStateActivation() = ActivationType::ACT_TANH,
-                 props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
-                 props::DropOutRate()),
-  acti_func(ActivationType::ACT_NONE, true),
-  recurrent_acti_func(ActivationType::ACT_NONE, true),
-  epsilon(1e-3) {
+LSTMCellLayer::LSTMCellLayer() : lstmcell_props(props::DropOutRate()) {
    wt_idx.fill(std::numeric_limits<unsigned>::max());
  }
  
@@ -56,16 +47,17 @@ void LSTMCellLayer::finalize(InitLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  NNTR_THROW_IF(std::get<props::Unit>(lstmcell_props).empty(),
+  NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
                  std::invalid_argument)
      << "unit property missing for lstmcell layer";
-  const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
    const ActivationType hidden_state_activation_type =
-    std::get<props::HiddenStateActivation>(lstmcell_props).get();
+    std::get<props::HiddenStateActivation>(lstmcore_props).get();
    const ActivationType recurrent_activation_type =
-    std::get<props::RecurrentActivation>(lstmcell_props).get();
+    std::get<props::RecurrentActivation>(lstmcore_props).get();
+
    const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
  
    NNTR_THROW_IF(context.getNumInputs() != 3, std::invalid_argument)
@@ -173,12 +165,12 @@ void LSTMCellLayer::finalize(InitLayerContext &context) {
  void LSTMCellLayer::setProperty(const std::vector<std::string> &values) {
    const std::vector<std::string> &remain_props =
      loadProperties(values, lstmcell_props);
-  LayerImpl::setProperty(remain_props);
+  LSTMCore::setProperty(remain_props);
  }
  
  void LSTMCellLayer::exportTo(Exporter &exporter,
                               const ml::train::ExportMethods &method) const {
-  LayerImpl::exportTo(exporter, method);
+  LSTMCore::exportTo(exporter, method);
    exporter.saveResult(lstmcell_props, method, this);
  }
  
@@ -186,10 +178,11 @@ void LSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
-  const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
+  const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
  
    const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
    const Tensor &prev_hidden_state =
@@ -218,10 +211,10 @@ void LSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
  
    Tensor &ifgo = context.getTensor(wt_idx[LSTMCellParams::ifgo]);
  
-  lstmcell_forwarding(batch_size, unit, disable_bias, integrate_bias, acti_func,
-                      recurrent_acti_func, input, prev_hidden_state,
-                      prev_cell_state, hidden_state, cell_state, weight_ih,
-                      weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+  forwardLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+              recurrent_acti_func, input, prev_hidden_state, prev_cell_state,
+              hidden_state, cell_state, weight_ih, weight_hh, bias_h, bias_ih,
+              bias_hh, ifgo);
  
    if (dropout_rate > epsilon && training) {
      Tensor &dropout_mask =
@@ -238,16 +231,17 @@ void LSTMCellLayer::calcDerivative(RunLayerContext &context) {
    Tensor &outgoing_derivative =
      context.getOutgoingDerivative(INOUT_INDEX::INPUT);
  
-  lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgo);
+  calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgo);
  }
  
  void LSTMCellLayer::calcGradient(RunLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
    const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props);
  
    const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
@@ -315,13 +309,13 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
      d_hidden_state.multiply(dropout_mask, d_hidden_state_masked);
    }
  
-  lstmcell_calcGradient(
-    batch_size, unit, disable_bias, integrate_bias, acti_func,
-    recurrent_acti_func, input, prev_hidden_state, d_prev_hidden_state,
-    prev_cell_state, d_prev_cell_state,
-    dropout_rate > epsilon ? d_hidden_state_masked : d_hidden_state, cell_state,
-    d_cell_state, d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
-    d_bias_hh, ifgo, d_ifgo);
+  calcGradientLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+                   recurrent_acti_func, input, prev_hidden_state,
+                   d_prev_hidden_state, prev_cell_state, d_prev_cell_state,
+                   dropout_rate > epsilon ? d_hidden_state_masked
+                                          : d_hidden_state,
+                   cell_state, d_cell_state, d_weight_ih, weight_hh,
+                   d_weight_hh, d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
  }
  
  void LSTMCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
diff --git a/nntrainer/layers/lstmcell.h b/nntrainer/layers/lstmcell.h

index ff7e5f1db11146fb407f594ba0b6cf629b92a488..d60d9d471f2b2fa1ceb678bf926d3a4dfe7a7ce1 100644 (file)
--- a/nntrainer/layers/lstmcell.h
+++ b/nntrainer/layers/lstmcell.h
@@ -17,7 +17,7 @@
  
  #include <acti_func.h>
  #include <common_properties.h>
-#include <layer_impl.h>
+#include <lstmcell_core.h>
  
  namespace nntrainer {
  
@@ -25,7 +25,7 @@ namespace nntrainer {
   * @class   LSTMCellLayer
   * @brief   LSTMCellLayer
   */
-class LSTMCellLayer : public LayerImpl {
+class LSTMCellLayer : public LSTMCore {
  public:
    /**
     * @brief     Constructor of LSTMCellLayer
@@ -97,33 +97,16 @@ private:
      OUTPUT_CELL_STATE = 1
    };
  
+  /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+   * RecurrentActivation are in lstmcore_props */
+
    /**
-   * Unit: number of output neurons
-   * IntegrateBias: integrate bias_ih, bias_hh to bias_h
-   * HiddenStateActivation: activation type for hidden state. default is tanh
-   * RecurrentActivation: activation type for recurrent. default is sigmoid
     * DropOutRate: dropout rate
     *
     * */
-  std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
-             props::RecurrentActivation, props::DropOutRate>
-    lstmcell_props;
-  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
-  /**
-   * @brief     activation function for h_t : default is tanh
-   */
-  ActiFunc acti_func;
+  std::tuple<props::DropOutRate> lstmcell_props;
  
-  /**
-   * @brief     activation function for recurrent : default is sigmoid
-   */
-  ActiFunc recurrent_acti_func;
-
-  /**
-   * @brief     to protect overflow
-   */
-  float epsilon;
+  std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
  };
  } // namespace nntrainer
  
diff --git a/nntrainer/layers/lstmcell_core.cpp b/nntrainer/layers/lstmcell_core.cpp

index 9bf7264f68e599d22064c61e8efb3ac1aa15fcc2..4d3c5f002a487c94a6cc97f011e04e6b6b1fed47 100644 (file)
--- a/nntrainer/layers/lstmcell_core.cpp
+++ b/nntrainer/layers/lstmcell_core.cpp
@@ -4,7 +4,7 @@
   *
   * @file   lstmcell_core.cpp
   * @date   25 November 2021
- * @brief  These are lstm core functions.
+ * @brief  This is lstm core class.
   * @see    https://github.com/nnstreamer/nntrainer
   * @author hyeonseok lee <hs89.lee@samsung.com>
   * @bug    No known bugs except for NYI items
@@ -21,15 +21,25 @@
  
  namespace nntrainer {
  
-void lstmcell_forwarding(const unsigned int batch_size, const unsigned int unit,
-                         const bool disable_bias, const bool integrate_bias,
-                         ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
-                         const Tensor &input, const Tensor &prev_hidden_state,
-                         const Tensor &prev_cell_state, Tensor &hidden_state,
-                         Tensor &cell_state, const Tensor &weight_ih,
-                         const Tensor &weight_hh, const Tensor &bias_h,
-                         const Tensor &bias_ih, const Tensor &bias_hh,
-                         Tensor &ifgo) {
+LSTMCore::LSTMCore() :
+  LayerImpl(),
+  lstmcore_props(props::Unit(), props::IntegrateBias(),
+                 props::HiddenStateActivation() = ActivationType::ACT_TANH,
+                 props::RecurrentActivation() = ActivationType::ACT_SIGMOID),
+  acti_func(ActivationType::ACT_NONE, true),
+  recurrent_acti_func(ActivationType::ACT_NONE, true),
+  epsilon(1e-3) {}
+
+void LSTMCore::forwardLSTM(const unsigned int batch_size,
+                           const unsigned int unit, const bool disable_bias,
+                           const bool integrate_bias, ActiFunc &acti_func,
+                           ActiFunc &recurrent_acti_func, const Tensor &input,
+                           const Tensor &prev_hidden_state,
+                           const Tensor &prev_cell_state, Tensor &hidden_state,
+                           Tensor &cell_state, const Tensor &weight_ih,
+                           const Tensor &weight_hh, const Tensor &bias_h,
+                           const Tensor &bias_ih, const Tensor &bias_hh,
+                           Tensor &ifgo) {
    input.dot(weight_ih, ifgo);
    prev_hidden_state.dot(weight_hh, ifgo, false, false, 1.0);
    if (!disable_bias) {
@@ -63,13 +73,13 @@ void lstmcell_forwarding(const unsigned int batch_size, const unsigned int unit,
    hidden_state.multiply_i_strided(output_gate);
  }
  
-void lstmcell_calcDerivative(Tensor &outgoing_derivative,
-                             const Tensor &weight_ih, const Tensor &d_ifgo,
-                             const float alpha) {
+void LSTMCore::calcDerivativeLSTM(Tensor &outgoing_derivative,
+                                  const Tensor &weight_ih, const Tensor &d_ifgo,
+                                  const float alpha) {
    d_ifgo.dot(weight_ih, outgoing_derivative, false, true, alpha);
  }
  
-void lstmcell_calcGradient(
+void LSTMCore::calcGradientLSTM(
    const unsigned int batch_size, const unsigned int unit,
    const bool disable_bias, const bool integrate_bias, ActiFunc &acti_func,
    ActiFunc &recurrent_acti_func, const Tensor &input,
@@ -173,4 +183,16 @@ void lstmcell_calcGradient(
    d_ifgo.dot(weight_hh, d_prev_hidden_state, false, true);
  }
  
+void LSTMCore::setProperty(const std::vector<std::string> &values) {
+  const std::vector<std::string> &remain_props =
+    loadProperties(values, lstmcore_props);
+  LayerImpl::setProperty(remain_props);
+}
+
+void LSTMCore::exportTo(Exporter &exporter,
+                        const ml::train::ExportMethods &method) const {
+  LayerImpl::exportTo(exporter, method);
+  exporter.saveResult(lstmcore_props, method, this);
+}
+
  } // namespace nntrainer
diff --git a/nntrainer/layers/lstmcell_core.h b/nntrainer/layers/lstmcell_core.h

index 3a3b7d44bed13839cb61d86cc445555b25cfe857..e846837e88afd12094c486a52b69119203093d73 100644 (file)
--- a/nntrainer/layers/lstmcell_core.h
+++ b/nntrainer/layers/lstmcell_core.h
@@ -4,7 +4,7 @@
   *
   * @file   lstmcell_core.h
   * @date   25 November 2021
- * @brief  These are lstm core functions.
+ * @brief  This is lstm core class.
   * @see           https://github.com/nnstreamer/nntrainer
   * @author hyeonseok lee <hs89.lee@samsung.com>
   * @bug    No known bugs except for NYI items
@@ -16,91 +16,152 @@
  #ifdef __cplusplus
  
  #include <acti_func.h>
+#include <common.h>
+#include <layer_impl.h>
+#include <node_exporter.h>
  
  namespace nntrainer {
  
  /**
- * @brief lstm cell forwarding implementation
- *
- * @param batch_size batch size
- * @param unit number of output neurons
- * @param disable_bias whether to disable bias or not
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param input input
- * @param prev_hidden_state previous hidden state
- * @param prev_cell_state previous cell state
- * @param hidden_state hidden state
- * @param cell_state cell state
- * @param weight_ih weight for input to hidden
- * @param weight_hh weight for hidden to hidden
- * @param bias_h bias for input and hidden.
- * @param bias_ih bias for input
- * @param bias_hh bias for hidden
- * @param ifgo input gate, forget gate, memory cell, output gate
+ * @class   LSTMCore
+ * @brief   LSTMCore
   */
-void lstmcell_forwarding(const unsigned int batch_size, const unsigned int unit,
-                         const bool disable_bias, const bool integrate_bias,
-                         ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
-                         const Tensor &input, const Tensor &prev_hidden_state,
-                         const Tensor &prev_cell_state, Tensor &hidden_state,
-                         Tensor &cell_state, const Tensor &weight_ih,
-                         const Tensor &weight_hh, const Tensor &bias_h,
-                         const Tensor &bias_ih, const Tensor &bias_hh,
-                         Tensor &ifgo);
+class LSTMCore : public LayerImpl {
+public:
+  /**
+   * @brief     Constructor of LSTMCore
+   */
+  LSTMCore();
  
-/**
- * @brief lstm cell calculate derivative implementation
- *
- * @param outgoing_derivative derivative for input
- * @param weight_ih weight for input to hidden
- * @param d_ifgo gradient for input gate, forget gate, memory cell, output gate
- * @param alpha value to be scale outgoing_derivative
- */
-void lstmcell_calcDerivative(Tensor &outgoing_derivative,
-                             const Tensor &weight_ih, const Tensor &d_ifgo,
-                             const float alpha = 0.0f);
+  /**
+   * @brief     Destructor of LSTMCore
+   */
+  ~LSTMCore() = default;
  
-/**
- * @brief lstm cell calculate gradient implementation
- *
- * @param batch_size batch size
- * @param unit number of output neurons
- * @param disable_bias whether to disable bias or not
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param input input
- * @param prev_hidden_state previous hidden state
- * @param d_prev_hidden_state previous hidden state gradient
- * @param prev_cell_state previous cell state
- * @param d_prev_cell_state previous cell state gradient
- * @param d_hidden_state hidden state gradient
- * @param cell_state cell state
- * @param d_cell_state cell state gradient
- * @param d_weight_ih weight_ih(weight for input to hidden) gradient
- * @param weight_hh weight for hidden to hidden
- * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
- * @param d_bias_h bias_h(bias for input and hidden) gradient
- * @param d_bias_ih bias_ih(bias for input) gradient
- * @param d_bias_hh bias_hh(bias for hidden) gradient
- * @param ifgo input gate, forget gate, memory cell, output gate
- * @param d_ifgo gradient for input gate, forget gate, memory cell, output gate
- */
-void lstmcell_calcGradient(
-  const unsigned int batch_size, const unsigned int unit,
-  const bool disable_bias, const bool integrate_bias, ActiFunc &acti_func,
-  ActiFunc &recurrent_acti_func, const Tensor &input,
-  const Tensor &prev_hidden_state, Tensor &d_prev_hidden_state,
-  const Tensor &prev_cell_state, Tensor &d_prev_cell_state,
-  const Tensor &d_hidden_state, const Tensor &cell_state,
-  const Tensor &d_cell_state, Tensor &d_weight_ih, const Tensor &weight_hh,
-  Tensor &d_weight_hh, Tensor &d_bias_h, Tensor &d_bias_ih, Tensor &d_bias_hh,
-  const Tensor &ifgo, Tensor &d_ifgo);
+  /**
+   * @brief lstm cell forwarding implementation
+   *
+   * @param batch_size batch size
+   * @param unit number of output neurons
+   * @param disable_bias whether to disable bias or not
+   * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+   * @param acti_func activation function for memory cell, cell state
+   * @param recurrent_acti_func activation function for input/output/forget
+   * gate
+   * @param input input
+   * @param prev_hidden_state previous hidden state
+   * @param prev_cell_state previous cell state
+   * @param hidden_state hidden state
+   * @param cell_state cell state
+   * @param weight_ih weight for input to hidden
+   * @param weight_hh weight for hidden to hidden
+   * @param bias_h bias for input and hidden.
+   * @param bias_ih bias for input
+   * @param bias_hh bias for hidden
+   * @param ifgo input gate, forget gate, memory cell, output gate
+   */
+  void forwardLSTM(const unsigned int batch_size, const unsigned int unit,
+                   const bool disable_bias, const bool integrate_bias,
+                   ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
+                   const Tensor &input, const Tensor &prev_hidden_state,
+                   const Tensor &prev_cell_state, Tensor &hidden_state,
+                   Tensor &cell_state, const Tensor &weight_ih,
+                   const Tensor &weight_hh, const Tensor &bias_h,
+                   const Tensor &bias_ih, const Tensor &bias_hh, Tensor &ifgo);
+
+  /**
+   * @brief lstm cell calculate derivative implementation
+   *
+   * @param outgoing_derivative derivative for input
+   * @param weight_ih weight for input to hidden
+   * @param d_ifgo gradient for input gate, forget gate, memory cell, output
+   * gate
+   * @param alpha value to be scale outgoing_derivative
+   */
+  void calcDerivativeLSTM(Tensor &outgoing_derivative, const Tensor &weight_ih,
+                          const Tensor &d_ifgo, const float alpha = 0.0f);
+
+  /**
+   * @brief lstm cell calculate gradient implementation
+   *
+   * @param batch_size batch size
+   * @param unit number of output neurons
+   * @param disable_bias whether to disable bias or not
+   * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+   * @param acti_func activation function for memory cell, cell state
+   * @param recurrent_acti_func activation function for input/output/forget
+   * gate
+   * @param input input
+   * @param prev_hidden_state previous hidden state
+   * @param d_prev_hidden_state previous hidden state gradient
+   * @param prev_cell_state previous cell state
+   * @param d_prev_cell_state previous cell state gradient
+   * @param d_hidden_state hidden state gradient
+   * @param cell_state cell state
+   * @param d_cell_state cell state gradient
+   * @param d_weight_ih weight_ih(weight for input to hidden) gradient
+   * @param weight_hh weight for hidden to hidden
+   * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
+   * @param d_bias_h bias_h(bias for input and hidden) gradient
+   * @param d_bias_ih bias_ih(bias for input) gradient
+   * @param d_bias_hh bias_hh(bias for hidden) gradient
+   * @param ifgo input gate, forget gate, memory cell, output gate
+   * @param d_ifgo gradient for input gate, forget gate, memory cell, output
+   * gate
+   */
+  void calcGradientLSTM(const unsigned int batch_size, const unsigned int unit,
+                        const bool disable_bias, const bool integrate_bias,
+                        ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
+                        const Tensor &input, const Tensor &prev_hidden_state,
+                        Tensor &d_prev_hidden_state,
+                        const Tensor &prev_cell_state,
+                        Tensor &d_prev_cell_state, const Tensor &d_hidden_state,
+                        const Tensor &cell_state, const Tensor &d_cell_state,
+                        Tensor &d_weight_ih, const Tensor &weight_hh,
+                        Tensor &d_weight_hh, Tensor &d_bias_h,
+                        Tensor &d_bias_ih, Tensor &d_bias_hh,
+                        const Tensor &ifgo, Tensor &d_ifgo);
+
+  /**
+   * @copydoc Layer::setProperty(const PropertyType type, const std::string
+   * &value)
+   */
+  void setProperty(const std::vector<std::string> &values) override;
+
+  /**
+   * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+   * method)
+   */
+  void exportTo(Exporter &exporter,
+                const ml::train::ExportMethods &method) const override;
+
+protected:
+  /**
+   * Unit: number of output neurons
+   * IntegrateBias: integrate bias_ih, bias_hh to bias_h
+   * HiddenStateActivation: activation type for hidden state. default is tanh
+   * RecurrentActivation: activation type for recurrent. default is sigmoid
+   *
+   * */
+  std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
+             props::RecurrentActivation>
+    lstmcore_props;
+
+  /**
+   * @brief     activation function: default is tanh
+   */
+  ActiFunc acti_func;
+
+  /**
+   * @brief     activation function for recurrent: default is sigmoid
+   */
+  ActiFunc recurrent_acti_func;
  
+  /**
+   * @brief     to protect overflow
+   */
+  float epsilon;
+};
  } // namespace nntrainer
  
  #endif /* __cplusplus */
diff --git a/nntrainer/layers/zoneout_lstmcell.cpp b/nntrainer/layers/zoneout_lstmcell.cpp

index d0732140eb2d3bd872beb0830ca8c149a727a832..a39727d333f1e4c3c3c93fede4e75f6342167f0b 100644 (file)
--- a/nntrainer/layers/zoneout_lstmcell.cpp
+++ b/nntrainer/layers/zoneout_lstmcell.cpp
@@ -34,16 +34,8 @@ enum ZoneoutLSTMParams {
  };
  
  ZoneoutLSTMCellLayer::ZoneoutLSTMCellLayer() :
-  LayerImpl(),
-  zoneout_lstmcell_props(
-    props::Unit(), props::IntegrateBias(),
-    props::HiddenStateActivation() = ActivationType::ACT_TANH,
-    props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
-    HiddenStateZoneOutRate(), CellStateZoneOutRate(), Test(),
-    props::MaxTimestep(), props::Timestep()),
-  acti_func(ActivationType::ACT_NONE, true),
-  recurrent_acti_func(ActivationType::ACT_NONE, true),
-  epsilon(1e-3) {
+  zoneout_lstmcell_props(HiddenStateZoneOutRate(), CellStateZoneOutRate(),
+                         Test(), props::MaxTimestep(), props::Timestep()) {
    wt_idx.fill(std::numeric_limits<unsigned>::max());
  }
  
@@ -79,16 +71,17 @@ void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  NNTR_THROW_IF(std::get<props::Unit>(zoneout_lstmcell_props).empty(),
+  NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
                  std::invalid_argument)
      << "unit property missing for zoneout_lstmcell layer";
-  const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
    const ActivationType hidden_state_activation_type =
-    std::get<props::HiddenStateActivation>(zoneout_lstmcell_props).get();
+    std::get<props::HiddenStateActivation>(lstmcore_props).get();
    const ActivationType recurrent_activation_type =
-    std::get<props::RecurrentActivation>(zoneout_lstmcell_props).get();
+    std::get<props::RecurrentActivation>(lstmcore_props).get();
+
    const bool test = std::get<Test>(zoneout_lstmcell_props).get();
    const unsigned int max_timestep =
      std::get<props::MaxTimestep>(zoneout_lstmcell_props).get();
@@ -241,12 +234,12 @@ void ZoneoutLSTMCellLayer::finalize(InitLayerContext &context) {
  void ZoneoutLSTMCellLayer::setProperty(const std::vector<std::string> &values) {
    const std::vector<std::string> &remain_props =
      loadProperties(values, zoneout_lstmcell_props);
-  LayerImpl::setProperty(remain_props);
+  LSTMCore::setProperty(remain_props);
  }
  
  void ZoneoutLSTMCellLayer::exportTo(
    Exporter &exporter, const ml::train::ExportMethods &method) const {
-  LayerImpl::exportTo(exporter, method);
+  LSTMCore::exportTo(exporter, method);
    exporter.saveResult(zoneout_lstmcell_props, method, this);
  }
  
@@ -254,9 +247,10 @@ void ZoneoutLSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
    const float hidden_state_zoneout_rate =
      std::get<HiddenStateZoneOutRate>(zoneout_lstmcell_props).get();
    const float cell_state_zoneout_rate =
@@ -299,10 +293,10 @@ void ZoneoutLSTMCellLayer::forwarding(RunLayerContext &context, bool training) {
    Tensor &lstm_cell_state =
      context.getTensor(wt_idx[ZoneoutLSTMParams::lstm_cell_state]);
  
-  lstmcell_forwarding(batch_size, unit, disable_bias, integrate_bias, acti_func,
-                      recurrent_acti_func, input, prev_hidden_state,
-                      prev_cell_state, hidden_state, lstm_cell_state, weight_ih,
-                      weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+  forwardLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+              recurrent_acti_func, input, prev_hidden_state, prev_cell_state,
+              hidden_state, lstm_cell_state, weight_ih, weight_hh, bias_h,
+              bias_ih, bias_hh, ifgo);
  
    if (training) {
      Tensor &hs_zoneout_mask =
@@ -355,16 +349,17 @@ void ZoneoutLSTMCellLayer::calcDerivative(RunLayerContext &context) {
      context.getWeight(wt_idx[ZoneoutLSTMParams::weight_ih]);
    const Tensor &d_ifgo = context.getTensorGrad(wt_idx[ZoneoutLSTMParams::ifgo]);
  
-  lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgo);
+  calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgo);
  }
  
  void ZoneoutLSTMCellLayer::calcGradient(RunLayerContext &context) {
    const bool disable_bias =
      std::get<props::DisableBias>(*layer_impl_props).get();
  
-  const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+  const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
    const bool integrate_bias =
-    std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+    std::get<props::IntegrateBias>(lstmcore_props).get();
+
    const bool test = std::get<Test>(zoneout_lstmcell_props).get();
    const unsigned int max_timestep =
      std::get<props::MaxTimestep>(zoneout_lstmcell_props).get();
@@ -467,12 +462,12 @@ void ZoneoutLSTMCellLayer::calcGradient(RunLayerContext &context) {
                          d_prev_cell_state_residual);
    d_cell_state.multiply(cell_state_zoneout_mask, d_lstm_cell_state);
  
-  lstmcell_calcGradient(
-    batch_size, unit, disable_bias, integrate_bias, acti_func,
-    recurrent_acti_func, input, prev_hidden_state, d_prev_hidden_state,
-    prev_cell_state, d_prev_cell_state, d_hidden_state_masked, lstm_cell_state,
-    d_lstm_cell_state, d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
-    d_bias_hh, ifgo, d_ifgo);
+  calcGradientLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+                   recurrent_acti_func, input, prev_hidden_state,
+                   d_prev_hidden_state, prev_cell_state, d_prev_cell_state,
+                   d_hidden_state_masked, lstm_cell_state, d_lstm_cell_state,
+                   d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
+                   d_bias_hh, ifgo, d_ifgo);
  
    d_prev_hidden_state.add_i(d_prev_hidden_state_residual);
    d_prev_cell_state.add_i(d_prev_cell_state_residual);
diff --git a/nntrainer/layers/zoneout_lstmcell.h b/nntrainer/layers/zoneout_lstmcell.h

index 2ea1f1b623207fd52e38b2e67f5b0519a4505c9d..a4852f6d4eeac69b96698f317c6ed91d17350a60 100644 (file)
--- a/nntrainer/layers/zoneout_lstmcell.h
+++ b/nntrainer/layers/zoneout_lstmcell.h
@@ -19,7 +19,6 @@
  
  #include <acti_func.h>
  #include <common_properties.h>
-#include <layer_impl.h>
  #include <lstmcell_core.h>
  
  namespace nntrainer {
@@ -28,7 +27,7 @@ namespace nntrainer {
   * @class   ZoneoutLSTMCellLayer
   * @brief   ZoneoutLSTMCellLayer
   */
-class ZoneoutLSTMCellLayer : public LayerImpl {
+class ZoneoutLSTMCellLayer : public LSTMCore {
  public:
    /**
     * @brief HiddenStateZoneOutRate property, this defines zone out rate for
@@ -178,11 +177,10 @@ private:
      OUTPUT_CELL_STATE = 1
    };
  
+  /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+   * RecurrentActivation are in lstmcore_props */
+
    /**
-   * Unit: number of output neurons
-   * IntegrateBias: integrate bias_ih, bias_hh to bias_h
-   * HiddenStateActivation: activation type for hidden state. default is tanh
-   * RecurrentActivation: activation type for recurrent. default is sigmoid
     * HiddenStateZoneOutRate: zoneout rate for hidden_state
     * CellStateZoneOutRate: zoneout rate for cell_state
     * Test: property for test mode
@@ -190,26 +188,10 @@ private:
     * TimeStep: timestep for which lstm should operate
     *
     * */
-  std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
-             props::RecurrentActivation, HiddenStateZoneOutRate,
-             CellStateZoneOutRate, Test, props::MaxTimestep, props::Timestep>
+  std::tuple<HiddenStateZoneOutRate, CellStateZoneOutRate, Test,
+             props::MaxTimestep, props::Timestep>
      zoneout_lstmcell_props;
    std::array<unsigned int, 9> wt_idx; /**< indices of the weights */
-
-  /**
-   * @brief     activation function for h_t : default is tanh
-   */
-  ActiFunc acti_func;
-
-  /**
-   * @brief     activation function for recurrent : default is sigmoid
-   */
-  ActiFunc recurrent_acti_func;
-
-  /**
-   * @brief     Protect overflow
-   */
-  float epsilon;
  };
  } // namespace nntrainer
author	hyeonseok lee <hs89.lee@samsung.com>
	Thu, 8 Sep 2022 08:53:59 +0000 (17:53 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 30 Sep 2022 01:28:29 +0000 (10:28 +0900)
nntrainer/layers/lstm.cpp		patch \| blob \| history
nntrainer/layers/lstm.h		patch \| blob \| history
nntrainer/layers/lstmcell.cpp		patch \| blob \| history
nntrainer/layers/lstmcell.h		patch \| blob \| history
nntrainer/layers/lstmcell_core.cpp		patch \| blob \| history
nntrainer/layers/lstmcell_core.h		patch \| blob \| history
nntrainer/layers/zoneout_lstmcell.cpp		patch \| blob \| history
nntrainer/layers/zoneout_lstmcell.h		patch \| blob \| history