[ GRU ] Skeleton Code for GRU
authorjijoong.moon <jijoong.moon@samsung.com>
Fri, 28 May 2021 03:45:12 +0000 (12:45 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 17 Jun 2021 03:40:20 +0000 (12:40 +0900)
This commit includes,
 . Skeleton Code for GRU Layer

**Self evaluation:**
1. Build test:  [X]Passed [ ]Failed [ ]Skipped
2. Run test:  [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
api/ccapi/include/layer.h
jni/Android.mk
nntrainer/app_context.cpp
nntrainer/layers/gru.cpp [new file with mode: 0644]
nntrainer/layers/gru.h [new file with mode: 0644]
nntrainer/layers/meson.build

index 90c4798..f0c87c1 100644 (file)
@@ -53,6 +53,7 @@ enum LayerType {
   LAYER_RNN,                  /** RNN Layer type */
   LAYER_LSTM,                 /** LSTM Layer type */
   LAYER_SPLIT,                /** Splite Layer type */
+  LAYER_GRU,                  /** GRU Layer type */
   LAYER_TIME_DIST,            /** Time Distributed Layer type */
   LAYER_PERMUTE,              /** Permute layer */
   LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /** Unknown */
@@ -285,6 +286,14 @@ LSTM(const std::vector<std::string> &properties = {}) {
 }
 
 /**
+ * @brief Helper function to create GRU layer
+ */
+inline std::unique_ptr<Layer>
+GRU(const std::vector<std::string> &properties = {}) {
+  return createLayer(LayerType::LAYER_GRU, properties);
+}
+
+/**
  * @brief Helper function to create Time Distributed layer
  */
 inline std::unique_ptr<Layer>
index b0d4be7..3495174 100644 (file)
@@ -147,6 +147,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/embedding.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/rnn.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/lstm.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/layers/gru.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/time_dist.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/permute_layer.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/acti_func.cpp \
index 5ec100e..96ec1c3 100644 (file)
@@ -36,6 +36,7 @@
 #include <embedding.h>
 #include <fc_layer.h>
 #include <flatten_layer.h>
+#include <gru.h>
 #include <input_layer.h>
 #include <loss_layer.h>
 #include <lstm.h>
@@ -245,6 +246,8 @@ static void add_default_object(AppContext &ac) {
                      LayerType::LAYER_RNN);
   ac.registerFactory(nntrainer::createLayer<LSTMLayer>, LSTMLayer::type,
                      LayerType::LAYER_LSTM);
+  ac.registerFactory(nntrainer::createLayer<GRULayer>, GRULayer::type,
+                     LayerType::LAYER_GRU);
   ac.registerFactory(nntrainer::createLayer<TimeDistLayer>, TimeDistLayer::type,
                      LayerType::LAYER_TIME_DIST);
   ac.registerFactory(nntrainer::createLayer<SplitLayer>, SplitLayer::type,
diff --git a/nntrainer/layers/gru.cpp b/nntrainer/layers/gru.cpp
new file mode 100644 (file)
index 0000000..1608c31
--- /dev/null
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file   gru.cpp
+ * @date   17 March 2021
+ * @brief  This is Gated Recurrent Unit Layer Class of Neural Network
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include <cmath>
+#include <gru.h>
+#include <layer_internal.h>
+#include <lazy_tensor.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <parse_util.h>
+#include <util_func.h>
+
+namespace nntrainer {
+
+const std::string GRULayer::type = "gru";
+
+enum GRUParams { weight_xh, weight_hh, bias_h };
+
+#define NUM_GATE 3
+
+// - weight_xh ( input to hidden )
+//  : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
+// - weight_hh ( hidden to hidden )
+//  : [1, 1, unit (hidden_size) , unit (hidden_size) x NUM_GATE] -> f, g, i, o
+// - bias_h ( hidden bias )
+//  : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
+int GRULayer::initialize(Manager &manager) {
+  int status = ML_ERROR_NONE;
+  if (getNumInputs() != 1) {
+    throw std::invalid_argument("GRU layer takes only one input");
+  }
+
+  // input_dim = [ batch, 1, time_iteration, feature_size ]
+  // if return_sequences == False :
+  //      output_dim = [ batch, 1, 1, hidden_size (unit)]
+  // else:
+  //      output_dim = [ batch, 1, time_iteration, hidden_size ( unit ) ]
+  output_dim[0] = input_dim[0];
+  output_dim[0].width(unit);
+
+  if (!return_sequences) {
+    output_dim[0].height(1);
+  }
+
+  TensorDim bias_dim = TensorDim();
+  bias_dim.setTensorDim(3, unit * NUM_GATE);
+
+  TensorDim dim_xh = output_dim[0];
+  dim_xh.height(input_dim[0].width());
+  dim_xh.width(unit * NUM_GATE);
+  dim_xh.batch(1);
+
+  TensorDim dim_hh = output_dim[0];
+  dim_hh.height(unit);
+  dim_hh.width(unit * NUM_GATE);
+  dim_hh.batch(1);
+
+  if (weights.empty()) {
+    weights.reserve(3);
+    // weight_initializer can be set sepeartely. weight_xh initializer,
+    // weight_hh initializer kernel initializer & recurrent_initializer in keras
+    // for now, it is set same way.
+    weights.emplace_back(dim_xh, weight_initializer, weight_regularizer,
+                         weight_regularizer_constant, true, "GRU:weight_xh");
+    weights.emplace_back(dim_hh, weight_initializer, weight_regularizer,
+                         weight_regularizer_constant, true, "GRU:weight_hh");
+    weights.emplace_back(bias_dim, bias_initializer, WeightRegularizer::NONE,
+                         1.0f, true, "GRU:bias_h");
+    manager.trackWeights(weights);
+  } else {
+    weights[GRUParams::weight_xh].reset(dim_xh, weight_initializer,
+                                        weight_regularizer,
+                                        weight_regularizer_constant, true);
+    weights[GRUParams::weight_hh].reset(dim_hh, weight_initializer,
+                                        weight_regularizer,
+                                        weight_regularizer_constant, true);
+    weights[GRUParams::bias_h].reset(bias_dim, bias_initializer,
+                                     WeightRegularizer::NONE, 1.0f, true);
+  }
+
+  TensorDim d = input_dim[0];
+  d.width(unit);
+
+  hidden = std::make_shared<Var_Grad>(d, true, true, "GRU:temp_hidden");
+  d.width(unit * NUM_GATE);
+
+  TensorDim h_dim = TensorDim();
+  h_dim.setTensorDim(3, unit);
+  h_dim.batch(input_dim[0].batch());
+
+  h_prev = Tensor(h_dim);
+
+  if (LayerV1::activation_type == ActivationType::ACT_NONE) {
+    LayerV1::activation_type = ActivationType::ACT_TANH;
+    acti_func.setActiFunc(activation_type);
+  }
+
+  if (recurrent_activation_type == ActivationType::ACT_NONE) {
+    recurrent_activation_type = ActivationType::ACT_SIGMOID;
+    recurrent_acti_func.setActiFunc(recurrent_activation_type);
+  }
+
+  return status;
+}
+
+void GRULayer::setProperty(const PropertyType type, const std::string &value) {
+  int status = ML_ERROR_NONE;
+  // TODO : Add return_state property & api to get the hidden input
+  switch (type) {
+  case PropertyType::unit: {
+    if (!value.empty()) {
+      status = setUint(unit, value);
+      throw_status(status);
+      output_dim[0].width(unit);
+    }
+    break;
+  case PropertyType::activation:
+    if (!value.empty()) {
+      ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+      LayerV1::activation_type = acti_type;
+      acti_func.setActiFunc(acti_type);
+    }
+    break;
+  case PropertyType::recurrent_activation:
+    if (!value.empty()) {
+      ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+      recurrent_activation_type = acti_type;
+      recurrent_acti_func.setActiFunc(acti_type);
+    }
+    break;
+  case PropertyType::return_sequences:
+    if (!value.empty()) {
+      status = setBoolean(return_sequences, value);
+      throw_status(status);
+    }
+    break;
+  default:
+    LayerV1::setProperty(type, value);
+    break;
+  }
+  }
+}
+
+void GRULayer::setRecurrentActivation(ActivationType activation) {
+  if (activation == ActivationType::ACT_UNKNOWN) {
+    throw std::invalid_argument("Error: have to specify activation function");
+  }
+  recurrent_activation_type = activation;
+}
+
+void GRULayer::forwarding(bool training) {
+  // NYI
+}
+
+void GRULayer::copy(std::shared_ptr<LayerV1> l) {
+  LayerV1::copy(l);
+
+  std::shared_ptr<GRULayer> from = std::static_pointer_cast<GRULayer>(l);
+  this->unit = from->unit;
+  this->acti_func = from->acti_func;
+  this->recurrent_activation_type = from->recurrent_activation_type;
+  this->recurrent_acti_func = from->recurrent_acti_func;
+  this->return_sequences = from->return_sequences;
+}
+
+void GRULayer::calcDerivative() {
+  // NYI
+}
+
+void GRULayer::calcGradient() {
+  // NYI
+}
+
+} // namespace nntrainer
diff --git a/nntrainer/layers/gru.h b/nntrainer/layers/gru.h
new file mode 100644 (file)
index 0000000..165a00d
--- /dev/null
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file   gru.h
+ * @date   31 March 2021
+ * @brief  This is Gated Recurrent Unit Layer Class of Neural Network
+ * @see           https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+#ifdef __cplusplus
+
+#include <layer_internal.h>
+#include <tensor.h>
+
+namespace nntrainer {
+
+/**
+ * @class   GRULayer
+ * @brief   GRULayer
+ */
+class GRULayer : public LayerV1 {
+public:
+  /**
+   * @brief     Constructor of GRULayer
+   */
+  template <typename... Args>
+  GRULayer(unsigned int unit_ = 0,
+           ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
+           bool sequence = false, Args... args) :
+    LayerV1(args...),
+    unit(unit_),
+    recurrent_activation_type(recurrent_activation_type_),
+    return_sequences(sequence){};
+
+  /**
+   * @brief     Destructor of GRULayer
+   */
+  ~GRULayer() = default;
+
+  /**
+   *  @brief  Move constructor.
+   *  @param[in] GRULayer &&
+   */
+  GRULayer(GRULayer &&rhs) noexcept = default;
+
+  /**
+   * @brief  Move assignment operator.
+   * @parma[in] rhs GRULayer to be moved.
+   */
+  GRULayer &operator=(GRULayer &&rhs) = default;
+
+  /**
+   * @copydoc Layer::forwarding(bool training)
+   */
+  void forwarding(bool training = true) override;
+
+  /**
+   * @copydoc Layer::calcDerivative()
+   */
+  void calcDerivative() override;
+
+  /**
+   * @copydoc Layer::calcGradient()
+   */
+  void calcGradient() override;
+
+  /**
+   * @brief     Activation Type Getter
+   * @retval    Activation Type.
+   */
+  ActivationType getRecurrentActivationType() {
+    return this->recurrent_activation_type;
+  }
+
+  /**
+   * @brief     copy layer
+   * @param[in] l layer to copy
+   */
+  void copy(std::shared_ptr<LayerV1> l) override;
+
+  /**
+   * @brief     initialize layer
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  int initialize(Manager &manager) override;
+
+  /**
+   * @copydoc Layer::getType()
+   */
+  const std::string getType() const override { return GRULayer::type; };
+
+  /**
+   * @brief     Activation Setter
+   * @param[in] activation activation type
+   * @throw std::invalid_argument when ActivationType is unknown
+   */
+  void setRecurrentActivation(ActivationType activation);
+
+  using LayerV1::setProperty;
+
+  /**
+   * @copydoc Layer::setProperty(const PropertyType type, const std::string
+   * &value)
+   */
+  void setProperty(const PropertyType type,
+                   const std::string &value = "") override;
+
+  static const std::string type;
+
+private:
+  /**
+   * @brief     hidden state size
+   */
+  unsigned int unit;
+
+  /**
+   * @brief     activation function for h_t : default is sigmoid
+   */
+  ActiFunc acti_func;
+
+  /**
+   * @brief     activation type for recurrent : default is tanh
+   */
+  ActivationType recurrent_activation_type;
+
+  /**
+   * @brief     activation function for recurrent : default is tanh
+   */
+  ActiFunc recurrent_acti_func;
+
+  /**
+   * @brief     To save hidden state variable ( batch, 1, 1, unit )
+   */
+  Tensor h_prev;
+
+  /**
+   * @brief     To save intermediate gates
+   */
+  std::shared_ptr<Var_Grad> zrg;
+
+  /**
+   * @brief     hidden state
+   */
+  std::shared_ptr<Var_Grad> hidden;
+
+  /**
+   * @brief     variable to set return sequences
+   */
+  bool return_sequences;
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __GRU_H__ */
index 39d0bdc..cba8865 100644 (file)
@@ -23,7 +23,8 @@ layer_sources = [
   'time_dist.cpp',
   'common_properties.cpp',
   'split_layer.cpp',
-  'permute_layer.cpp'
+  'permute_layer.cpp',
+  'gru.cpp'
 ]
 
 layer_headers = [