[ LSTM ] Add Skeleton LSTM Layer Class
authorjijoong.moon <jijoong.moon@samsung.com>
Wed, 31 Mar 2021 11:33:52 +0000 (20:33 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 1 Apr 2021 07:50:52 +0000 (16:50 +0900)
This PR includes skeleton code of lstm layer

Resolves:

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
api/ccapi/include/layer.h
jni/Android.mk
nntrainer/layers/lstm.cpp [new file with mode: 0644]
nntrainer/layers/lstm.h [new file with mode: 0644]
nntrainer/layers/meson.build
nntrainer/utils/parse_util.cpp

index c079178..42ada33 100644 (file)
@@ -107,6 +107,7 @@ public:
    *            31. in_dim : int ( input dimension for embedding layer )
    *            32. out_dim : int ( output dimesion for embedding layer )
    *            33. in_length : int ( input length for embedding layer )
+   *            34. recurrent_activation :  string (type) - lstm
    */
   enum class PropertyType {
     input_shape = 0,
@@ -143,6 +144,7 @@ public:
     in_dim = 31,
     out_dim = 32,
     in_length = 33,
+    recurrent_activation = 34,
     unknown
   };
 
index 5917189..686bb73 100644 (file)
@@ -115,6 +115,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/preprocess_translate_layer.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/embedding.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/rnn.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/layers/lstm.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/acti_func.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/graph/network_graph.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/optimizers/optimizer_devel.cpp \
diff --git a/nntrainer/layers/lstm.cpp b/nntrainer/layers/lstm.cpp
new file mode 100644 (file)
index 0000000..1e649b3
--- /dev/null
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file       lstm.cpp
+ * @date       17 March 2021
+ * @brief      This is Long Short-Term Memory Layer Class of Neural Network
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug                No known bugs except for NYI items
+ *
+ */
+
+#include <cmath>
+#include <layer_internal.h>
+#include <lazy_tensor.h>
+#include <lstm.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <parse_util.h>
+#include <util_func.h>
+
+namespace nntrainer {
+
+const std::string LSTMLayer::type = "lstm";
+
+enum LSTMParams { weight_xh, weight_hh, bias_h };
+
+int LSTMLayer::initialize(Manager &manager) {
+  int status = ML_ERROR_NONE;
+  if (getNumInputs() != 1) {
+    throw std::invalid_argument("LSTM layer takes only one input");
+  }
+
+  return status;
+}
+
+void LSTMLayer::setProperty(const PropertyType type, const std::string &value) {
+  int status = ML_ERROR_NONE;
+  // TODO : Add return_state property & api to get the hidden input
+  switch (type) {
+  case PropertyType::unit: {
+    if (!value.empty()) {
+      status = setUint(unit, value);
+      throw_status(status);
+      output_dim[0].width(unit);
+    }
+    break;
+  case PropertyType::activation:
+    if (!value.empty()) {
+      ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+      Layer::activation_type = acti_type;
+      acti_func.setActiFunc(acti_type);
+    }
+    break;
+  case PropertyType::recurrent_activation:
+    if (!value.empty()) {
+      ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+      recurrent_activation_type = acti_type;
+      recurrent_acti_func.setActiFunc(acti_type);
+    }
+  default:
+    Layer::setProperty(type, value);
+    break;
+  }
+  }
+}
+
+void LSTMLayer::setRecurrentActivation(ActivationType activation) {
+  if (activation == ActivationType::ACT_UNKNOWN) {
+    throw std::invalid_argument("Error: have to specify activation function");
+  }
+  recurrent_activation_type = activation;
+}
+
+void LSTMLayer::forwarding(bool training) {
+  // NYI
+}
+
+void LSTMLayer::copy(std::shared_ptr<Layer> l) {
+  Layer::copy(l);
+
+  std::shared_ptr<LSTMLayer> from = std::static_pointer_cast<LSTMLayer>(l);
+  this->unit = from->unit;
+  this->acti_func = from->acti_func;
+  this->recurrent_activation_type = from->recurrent_activation_type;
+  this->recurrent_acti_func = from->recurrent_acti_func;
+}
+
+void LSTMLayer::calcDerivative() {
+  // NYI
+}
+
+void LSTMLayer::calcGradient() {
+  // NYI
+}
+
+} // namespace nntrainer
diff --git a/nntrainer/layers/lstm.h b/nntrainer/layers/lstm.h
new file mode 100644 (file)
index 0000000..e8c2126
--- /dev/null
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file       lstm.h
+ * @date       31 March 2021
+ * @brief      This is Long Short-Term Memory Layer Class of Neural Network
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug                No known bugs except for NYI items
+ *
+ */
+
+#ifndef __LSTM_H__
+#define __LSTM_H__
+#ifdef __cplusplus
+
+#include <layer_internal.h>
+#include <tensor.h>
+
+namespace nntrainer {
+
+/**
+ * @class   LSTMLayer
+ * @brief   LSTMLayer
+ */
+class LSTMLayer : public Layer {
+public:
+  /**
+   * @brief     Constructor of LSTMLayer
+   */
+  template <typename... Args>
+  LSTMLayer(unsigned int unit_ = 0, Args... args) :
+    Layer(args...),
+    unit(unit_) {
+    /* Default Activation Type is tanh */
+    if (getActivationType() == ActivationType::ACT_NONE)
+      setActivation(ActivationType::ACT_TANH);
+    if (getRecurrentActivationType() == ActivationType::ACT_NONE)
+      setRecurrentActivation(ActivationType::ACT_SIGMOID);
+  }
+
+  /**
+   * @brief     Destructor of LSTMLayer
+   */
+  ~LSTMLayer(){};
+
+  /**
+   *  @brief  Move constructor.
+   *  @param[in] LSTMLayer &&
+   */
+  LSTMLayer(LSTMLayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs LSTMLayer to be moved.
+   */
+  LSTMLayer &operator=(LSTMLayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::forwarding(bool training)
+   */
+  void forwarding(bool training = true) override;
+
+  /**
+   * @copydoc Layer::calcDerivative()
+   */
+  void calcDerivative() override;
+
+  /**
+   * @copydoc Layer::calcGradient()
+   */
+  void calcGradient() override;
+
+  /**
+   * @brief     Activation Type Getter
+   * @retval    Activation Type.
+   */
+  ActivationType getRecurrentActivationType() {
+    return this->recurrent_activation_type;
+  }
+
+  /**
+   * @brief     copy layer
+   * @param[in] l layer to copy
+   */
+  void copy(std::shared_ptr<Layer> l) override;
+
+  /**
+   * @brief     initialize layer
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int initialize(Manager &manager) override;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return LSTMLayer::type; };
+
+  /**
+   * @brief     Activation Setter
+   * @param[in] activation activation type
+   * @throw std::invalid_argument when ActivationType is unknown
+   */
+  void setRecurrentActivation(ActivationType activation);
+
+  using Layer::setProperty;
+
+  /**
+   * @copydoc Layer::setProperty(const PropertyType type, const std::string
+   * &value)
+   */
+  void setProperty(const PropertyType type,
+                   const std::string &value = "") override;
+
+  static const std::string type;
+
+private:
+  /**
+   * @brief     hidden state size
+   */
+  unsigned int unit;
+
+  /**
+   * @brief     activation function for h_t : default is tanh
+   */
+  ActiFunc acti_func;
+
+  /**
+   * @brief     activation type for recurrent : default is sigmoid
+   */
+  ActivationType recurrent_activation_type;
+
+  /**
+   * @brief     activation function for recurrent : default is sigmoid
+   */
+  ActiFunc recurrent_acti_func;
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __LSTM_H__ */
index 1705efb..09a667f 100644 (file)
@@ -17,7 +17,8 @@ layer_sources = [
   'preprocess_translate_layer.cpp',
   'embedding.cpp',
   'rnn.cpp',
-  'acti_func.cpp'
+  'acti_func.cpp',
+  'lstm.cpp'
 ]
 
 layer_headers = [
index e909cfd..f7e495c 100644 (file)
@@ -245,6 +245,10 @@ unsigned int parseType(std::string ll, InputType t) {
  * trainable = 28
  * flip_direction = 29
  * random_tranlate = 30
+ * in_dim = 31
+ * out_dim = 32
+ * in_length = 33
+ * recurrent_activation = 34
  *
  * InputLayer has 0, 1, 2, 3 properties.
  * FullyConnectedLayer has 1, 4, 6, 7, 8, 9 properties.
@@ -252,7 +256,7 @@ unsigned int parseType(std::string ll, InputType t) {
  * Pooling2DLayer has 12, 13, 14, 15 properties.
  * BatchNormalizationLayer has 0, 1, 5, 6, 7 properties.
  */
-static std::array<std::string, 35> property_string = {
+static std::array<std::string, 36> property_string = {
   "input_shape",
   "normalization",
   "standardization",
@@ -287,6 +291,7 @@ static std::array<std::string, 35> property_string = {
   "in_dim",
   "out_dim",
   "in_length",
+  "recurrent_activation",
   "unknown"};
 
 unsigned int parseLayerProperty(std::string property) {