--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file gru.cpp
+ * @date 17 March 2021
+ * @brief This is Gated Recurrent Unit Layer Class of Neural Network
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#include <cmath>
+#include <gru.h>
+#include <layer_internal.h>
+#include <lazy_tensor.h>
+#include <nntrainer_error.h>
+#include <nntrainer_log.h>
+#include <parse_util.h>
+#include <util_func.h>
+
+namespace nntrainer {
+
+const std::string GRULayer::type = "gru";
+
+enum GRUParams { weight_xh, weight_hh, bias_h };
+
+#define NUM_GATE 3
+
+// - weight_xh ( input to hidden )
+// : [1, 1, input_size, unit (hidden_size) x NUM_GATE] -> f, g, i, o
+// - weight_hh ( hidden to hidden )
+// : [1, 1, unit (hidden_size) , unit (hidden_size) x NUM_GATE] -> f, g, i, o
+// - bias_h ( hidden bias )
+// : [1, 1, 1, unit (hidden_size) x NUM_GATE] -> f, g, i, o
+int GRULayer::initialize(Manager &manager) {
+ int status = ML_ERROR_NONE;
+ if (getNumInputs() != 1) {
+ throw std::invalid_argument("GRU layer takes only one input");
+ }
+
+ // input_dim = [ batch, 1, time_iteration, feature_size ]
+ // if return_sequences == False :
+ // output_dim = [ batch, 1, 1, hidden_size (unit)]
+ // else:
+ // output_dim = [ batch, 1, time_iteration, hidden_size ( unit ) ]
+ output_dim[0] = input_dim[0];
+ output_dim[0].width(unit);
+
+ if (!return_sequences) {
+ output_dim[0].height(1);
+ }
+
+ TensorDim bias_dim = TensorDim();
+ bias_dim.setTensorDim(3, unit * NUM_GATE);
+
+ TensorDim dim_xh = output_dim[0];
+ dim_xh.height(input_dim[0].width());
+ dim_xh.width(unit * NUM_GATE);
+ dim_xh.batch(1);
+
+ TensorDim dim_hh = output_dim[0];
+ dim_hh.height(unit);
+ dim_hh.width(unit * NUM_GATE);
+ dim_hh.batch(1);
+
+ if (weights.empty()) {
+ weights.reserve(3);
+ // weight_initializer can be set sepeartely. weight_xh initializer,
+ // weight_hh initializer kernel initializer & recurrent_initializer in keras
+ // for now, it is set same way.
+ weights.emplace_back(dim_xh, weight_initializer, weight_regularizer,
+ weight_regularizer_constant, true, "GRU:weight_xh");
+ weights.emplace_back(dim_hh, weight_initializer, weight_regularizer,
+ weight_regularizer_constant, true, "GRU:weight_hh");
+ weights.emplace_back(bias_dim, bias_initializer, WeightRegularizer::NONE,
+ 1.0f, true, "GRU:bias_h");
+ manager.trackWeights(weights);
+ } else {
+ weights[GRUParams::weight_xh].reset(dim_xh, weight_initializer,
+ weight_regularizer,
+ weight_regularizer_constant, true);
+ weights[GRUParams::weight_hh].reset(dim_hh, weight_initializer,
+ weight_regularizer,
+ weight_regularizer_constant, true);
+ weights[GRUParams::bias_h].reset(bias_dim, bias_initializer,
+ WeightRegularizer::NONE, 1.0f, true);
+ }
+
+ TensorDim d = input_dim[0];
+ d.width(unit);
+
+ hidden = std::make_shared<Var_Grad>(d, true, true, "GRU:temp_hidden");
+ d.width(unit * NUM_GATE);
+
+ TensorDim h_dim = TensorDim();
+ h_dim.setTensorDim(3, unit);
+ h_dim.batch(input_dim[0].batch());
+
+ h_prev = Tensor(h_dim);
+
+ if (LayerV1::activation_type == ActivationType::ACT_NONE) {
+ LayerV1::activation_type = ActivationType::ACT_TANH;
+ acti_func.setActiFunc(activation_type);
+ }
+
+ if (recurrent_activation_type == ActivationType::ACT_NONE) {
+ recurrent_activation_type = ActivationType::ACT_SIGMOID;
+ recurrent_acti_func.setActiFunc(recurrent_activation_type);
+ }
+
+ return status;
+}
+
+void GRULayer::setProperty(const PropertyType type, const std::string &value) {
+ int status = ML_ERROR_NONE;
+ // TODO : Add return_state property & api to get the hidden input
+ switch (type) {
+ case PropertyType::unit: {
+ if (!value.empty()) {
+ status = setUint(unit, value);
+ throw_status(status);
+ output_dim[0].width(unit);
+ }
+ break;
+ case PropertyType::activation:
+ if (!value.empty()) {
+ ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+ LayerV1::activation_type = acti_type;
+ acti_func.setActiFunc(acti_type);
+ }
+ break;
+ case PropertyType::recurrent_activation:
+ if (!value.empty()) {
+ ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
+ recurrent_activation_type = acti_type;
+ recurrent_acti_func.setActiFunc(acti_type);
+ }
+ break;
+ case PropertyType::return_sequences:
+ if (!value.empty()) {
+ status = setBoolean(return_sequences, value);
+ throw_status(status);
+ }
+ break;
+ default:
+ LayerV1::setProperty(type, value);
+ break;
+ }
+ }
+}
+
+void GRULayer::setRecurrentActivation(ActivationType activation) {
+ if (activation == ActivationType::ACT_UNKNOWN) {
+ throw std::invalid_argument("Error: have to specify activation function");
+ }
+ recurrent_activation_type = activation;
+}
+
+void GRULayer::forwarding(bool training) {
+ // NYI
+}
+
+void GRULayer::copy(std::shared_ptr<LayerV1> l) {
+ LayerV1::copy(l);
+
+ std::shared_ptr<GRULayer> from = std::static_pointer_cast<GRULayer>(l);
+ this->unit = from->unit;
+ this->acti_func = from->acti_func;
+ this->recurrent_activation_type = from->recurrent_activation_type;
+ this->recurrent_acti_func = from->recurrent_acti_func;
+ this->return_sequences = from->return_sequences;
+}
+
+void GRULayer::calcDerivative() {
+ // NYI
+}
+
+void GRULayer::calcGradient() {
+ // NYI
+}
+
+} // namespace nntrainer
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Jijoong Moon <jijoong.moon@samsung.com>
+ *
+ * @file gru.h
+ * @date 31 March 2021
+ * @brief This is Gated Recurrent Unit Layer Class of Neural Network
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Jijoong Moon <jijoong.moon@samsung.com>
+ * @bug No known bugs except for NYI items
+ *
+ */
+
+#ifndef __GRU_H__
+#define __GRU_H__
+#ifdef __cplusplus
+
+#include <layer_internal.h>
+#include <tensor.h>
+
+namespace nntrainer {
+
+/**
+ * @class GRULayer
+ * @brief GRULayer
+ */
+class GRULayer : public LayerV1 {
+public:
+ /**
+ * @brief Constructor of GRULayer
+ */
+ template <typename... Args>
+ GRULayer(unsigned int unit_ = 0,
+ ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
+ bool sequence = false, Args... args) :
+ LayerV1(args...),
+ unit(unit_),
+ recurrent_activation_type(recurrent_activation_type_),
+ return_sequences(sequence){};
+
+ /**
+ * @brief Destructor of GRULayer
+ */
+ ~GRULayer() = default;
+
+ /**
+ * @brief Move constructor.
+ * @param[in] GRULayer &&
+ */
+ GRULayer(GRULayer &&rhs) noexcept = default;
+
+ /**
+ * @brief Move assignment operator.
+ * @parma[in] rhs GRULayer to be moved.
+ */
+ GRULayer &operator=(GRULayer &&rhs) = default;
+
+ /**
+ * @copydoc Layer::forwarding(bool training)
+ */
+ void forwarding(bool training = true) override;
+
+ /**
+ * @copydoc Layer::calcDerivative()
+ */
+ void calcDerivative() override;
+
+ /**
+ * @copydoc Layer::calcGradient()
+ */
+ void calcGradient() override;
+
+ /**
+ * @brief Activation Type Getter
+ * @retval Activation Type.
+ */
+ ActivationType getRecurrentActivationType() {
+ return this->recurrent_activation_type;
+ }
+
+ /**
+ * @brief copy layer
+ * @param[in] l layer to copy
+ */
+ void copy(std::shared_ptr<LayerV1> l) override;
+
+ /**
+ * @brief initialize layer
+ * @retval #ML_ERROR_NONE Successful.
+ * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+ */
+ int initialize(Manager &manager) override;
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override { return GRULayer::type; };
+
+ /**
+ * @brief Activation Setter
+ * @param[in] activation activation type
+ * @throw std::invalid_argument when ActivationType is unknown
+ */
+ void setRecurrentActivation(ActivationType activation);
+
+ using LayerV1::setProperty;
+
+ /**
+ * @copydoc Layer::setProperty(const PropertyType type, const std::string
+ * &value)
+ */
+ void setProperty(const PropertyType type,
+ const std::string &value = "") override;
+
+ static const std::string type;
+
+private:
+ /**
+ * @brief hidden state size
+ */
+ unsigned int unit;
+
+ /**
+ * @brief activation function for h_t : default is sigmoid
+ */
+ ActiFunc acti_func;
+
+ /**
+ * @brief activation type for recurrent : default is tanh
+ */
+ ActivationType recurrent_activation_type;
+
+ /**
+ * @brief activation function for recurrent : default is tanh
+ */
+ ActiFunc recurrent_acti_func;
+
+ /**
+ * @brief To save hidden state variable ( batch, 1, 1, unit )
+ */
+ Tensor h_prev;
+
+ /**
+ * @brief To save intermediate gates
+ */
+ std::shared_ptr<Var_Grad> zrg;
+
+ /**
+ * @brief hidden state
+ */
+ std::shared_ptr<Var_Grad> hidden;
+
+ /**
+ * @brief variable to set return sequences
+ */
+ bool return_sequences;
+};
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __GRU_H__ */