- Make lstm_core as class so let lstm/lstm_cell/zoneout_lstmcell inherit this.
close #1997
Signed-off-by: hyeonseok lee <hs89.lee@samsung.com>
#include <layer_context.h>
#include <lstm.h>
-#include <lstmcell_core.h>
#include <nntr_threads.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
dropout_mask
};
-/**
- * @brief run lstm fowarding for batch_first input
- *
- * @param NUM_GATE Number of gate which is 4 for lstm
- * @param batch_size batch size
- * @param feature_size feature size
- * @param disable_bias whether to disable bias or not
- * @param unit number of output neurons
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param enable_dropout whether to apply dropout
- * @param dropout_rate dropout rate
- * @param max_timestep maximum timestep for lstm
- * @param reverse indicate forward/backward direction for input in bidirectional
- * lstm
- * @param input_ input
- * @param weight_ih weight for input to hidden
- * @param weight_hh weight for hidden to hidden
- * @param bias_h bias for input and hidden.
- * @param bias_ih bias for input
- * @param bias_hh bias for hidden
- * @param hidden_state_ hidden state
- * @param cell_state_ cell state
- * @param ifgo_ input gate, forget gate, memory cell, output gate
- * @param mask_ dropout mask
- */
-static void batch_first_forwarding(
+void LSTMLayer::forwardingBatchFirstLSTM(
unsigned int NUM_GATE, const unsigned int batch_size,
const unsigned int feature_size, const bool disable_bias,
const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
{NUM_GATE * unit},
(reverse ? max_timestep - 1 - t : t) * NUM_GATE * unit);
- lstmcell_forwarding(1, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state,
- prev_cell_state, hidden_state, cell_state, weight_ih,
- weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+ forwardLSTM(1, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state,
+ prev_cell_state, hidden_state, cell_state, weight_ih,
+ weight_hh, bias_h, bias_ih, bias_hh, ifgo);
if (enable_dropout) {
Tensor mask_sample = mask_.getBatchSlice(batch, 1);
}
}
-/**
- * @brief calculate lstm gradient for batch_first input
- *
- * @param NUM_GATE Number of gate which is 4 for lstm
- * @param batch_size batch size
- * @param feature_size feature size
- * @param disable_bias whether to disable bias or not
- * @param unit number of output neurons
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param return_sequences return sequeces
- * @param bidirectional bidirectional lstm
- * @param enable_dropout whether to apply dropout
- * @param dropout_rate dropout rate
- * @param max_timestep maximum timestep for lstm
- * @param reverse indicate forward/backward direction for input in bidirectional
- * lstm
- * @param input_ input
- * @param incoming_derivative derivative for output which is incoming derivative
- * @param d_weight_ih weight_ih(weight for input to hidden) gradient
- * @param weight_hh weight for hidden to hidden
- * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
- * @param d_bias_h bias_h(bias for input and hidden) gradient
- * @param d_bias_ih bias_ih(bias for input) gradient
- * @param d_bias_hh bias_hh(bias for hidden) gradient
- * @param hidden_state_ hidden state
- * @param d_hidden_state_ hidden state gradient
- * @param cell_state_ cell state
- * @param d_cell_state_ cell state gradient
- * @param ifgo_ input gate, forget gate, memory cell, output gate
- * @param d_ifgo_ gradient for input gate, forget gate, memory cell, output gate
- * @param mask_ dropout mask
- */
-void batch_first_calcGradient(
+void LSTMLayer::calcGradientBatchFirstLSTM(
unsigned int NUM_GATE, const unsigned int batch_size,
const unsigned int feature_size, const bool disable_bias,
const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
// already have precalculated values from incomming derivatives
Tensor d_prev_hidden_state_temp;
- lstmcell_calcGradient(
+ calcGradientLSTM(
1, unit, disable_bias, integrate_bias, acti_func,
recurrent_acti_func, input, prev_hidden_state,
d_prev_hidden_state_temp, prev_cell_state, d_prev_cell_state,
// already have precalculated values from incomming derivatives
Tensor d_prev_hidden_state_temp;
- lstmcell_calcGradient(1, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state,
- d_prev_hidden_state_temp, prev_cell_state,
- d_prev_cell_state, d_hidden_state, cell_state,
- d_cell_state, d_weight_ih, weight_hh, d_weight_hh,
- d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
+ calcGradientLSTM(1, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state,
+ d_prev_hidden_state_temp, prev_cell_state,
+ d_prev_cell_state, d_hidden_state, cell_state,
+ d_cell_state, d_weight_ih, weight_hh, d_weight_hh,
+ d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
d_prev_hidden_state.add_i(d_prev_hidden_state_temp);
}
}
}
LSTMLayer::LSTMLayer() :
- LayerImpl(),
- lstm_props(props::Unit(), props::IntegrateBias(),
- props::HiddenStateActivation() = ActivationType::ACT_TANH,
- props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
- props::ReturnSequences(), props::Bidirectional(),
- props::DropOutRate(), props::MaxTimestep()),
- acti_func(ActivationType::ACT_NONE, true),
- recurrent_acti_func(ActivationType::ACT_NONE, true),
- epsilon(1e-3) {
+ LSTMCore(),
+ lstm_props(props::ReturnSequences(), props::Bidirectional(),
+ props::DropOutRate(), props::MaxTimestep()) {
wt_idx.fill(std::numeric_limits<unsigned>::max());
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- NNTR_THROW_IF(std::get<props::Unit>(lstm_props).empty(),
+ NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
std::invalid_argument)
<< "unit property missing for lstm layer";
- const unsigned int unit = std::get<props::Unit>(lstm_props).get();
- const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+ const bool integrate_bias =
+ std::get<props::IntegrateBias>(lstmcore_props).get();
const ActivationType hidden_state_activation_type =
- std::get<props::HiddenStateActivation>(lstm_props).get();
+ std::get<props::HiddenStateActivation>(lstmcore_props).get();
const ActivationType recurrent_activation_type =
- std::get<props::RecurrentActivation>(lstm_props).get();
+ std::get<props::RecurrentActivation>(lstmcore_props).get();
+
const bool return_sequences =
std::get<props::ReturnSequences>(lstm_props).get();
const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
void LSTMLayer::setProperty(const std::vector<std::string> &values) {
const std::vector<std::string> &remain_props =
loadProperties(values, lstm_props);
- LayerImpl::setProperty(remain_props);
+ LSTMCore::setProperty(remain_props);
}
void LSTMLayer::exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const {
- LayerImpl::exportTo(exporter, method);
+ LSTMCore::exportTo(exporter, method);
exporter.saveResult(lstm_props, method, this);
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(lstm_props).get();
- const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+ const bool integrate_bias =
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
const bool return_sequences =
std::get<props::ReturnSequences>(lstm_props).get();
const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
? context.getTensor(wt_idx[LSTMParams::dropout_mask])
: empty;
- batch_first_forwarding(NUM_GATE, batch_size, feature_size, disable_bias, unit,
- integrate_bias, acti_func, recurrent_acti_func,
- enable_dropout, dropout_rate, max_timestep, false,
- input, weight_ih, weight_hh, bias_h, bias_ih, bias_hh,
- hidden_state, cell_state, ifgo, mask);
+ forwardingBatchFirstLSTM(NUM_GATE, batch_size, feature_size, disable_bias,
+ unit, integrate_bias, acti_func, recurrent_acti_func,
+ enable_dropout, dropout_rate, max_timestep, false,
+ input, weight_ih, weight_hh, bias_h, bias_ih,
+ bias_hh, hidden_state, cell_state, ifgo, mask);
if (bidirectional) {
const Tensor &reverse_weight_ih =
context.getTensor(wt_idx[LSTMParams::reverse_cell_state]);
Tensor &reverse_ifgo = context.getTensor(wt_idx[LSTMParams::reverse_ifgo]);
- batch_first_forwarding(
+ forwardingBatchFirstLSTM(
NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
acti_func, recurrent_acti_func, enable_dropout, dropout_rate,
max_timestep, true, input, reverse_weight_ih, reverse_weight_hh,
const Tensor &weight_ih = context.getWeight(wt_idx[LSTMParams::weight_ih]);
const Tensor &d_ifgos = context.getTensorGrad(wt_idx[LSTMParams::ifgo]);
- lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgos);
+ calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgos);
if (bidirectional) {
const Tensor &reverse_weight_ih =
const Tensor &reverse_d_ifgos =
context.getTensorGrad(wt_idx[LSTMParams::reverse_ifgo]);
- lstmcell_calcDerivative(outgoing_derivative, reverse_weight_ih,
- reverse_d_ifgos, 1.0f);
+ calcDerivativeLSTM(outgoing_derivative, reverse_weight_ih, reverse_d_ifgos,
+ 1.0f);
}
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(lstm_props).get();
- const bool integrate_bias = std::get<props::IntegrateBias>(lstm_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
+ const bool integrate_bias =
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
const bool return_sequences =
std::get<props::ReturnSequences>(lstm_props).get();
const bool bidirectional = std::get<props::Bidirectional>(lstm_props).get();
? context.getTensor(wt_idx[LSTMParams::dropout_mask])
: empty;
- batch_first_calcGradient(
+ calcGradientBatchFirstLSTM(
NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
acti_func, recurrent_acti_func, return_sequences, bidirectional,
enable_dropout, dropout_rate, max_timestep, false, input,
Tensor &reverse_d_ifgo =
context.getTensorGrad(wt_idx[LSTMParams::reverse_ifgo]);
- batch_first_calcGradient(
+ calcGradientBatchFirstLSTM(
NUM_GATE, batch_size, feature_size, disable_bias, unit, integrate_bias,
acti_func, recurrent_acti_func, return_sequences, bidirectional,
enable_dropout, dropout_rate, max_timestep, true, input,
#include <acti_func.h>
#include <common_properties.h>
-#include <layer_impl.h>
+#include <lstmcell_core.h>
namespace nntrainer {
* @class LSTMLayer
* @brief LSTMLayer
*/
-class LSTMLayer : public LayerImpl {
+class LSTMLayer : public LSTMCore {
public:
/**
* @brief Constructor of LSTMLayer
private:
static constexpr unsigned int NUM_GATE = 4;
+ /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+ * RecurrentActivation are in lstmcore_props */
+
/**
- * Unit: number of output neurons
- * IntegrateBias: integrate bias_ih, bias_hh to bias_h
- * HiddenStateActivation: activation type for hidden state. default is tanh
- * RecurrentActivation: activation type for recurrent. default is sigmoid
* ReturnSequence: option for return sequence
* Bidirectional: option for bidirectional
* DropOutRate: dropout rate
* MaxTimestep: maximum timestep for lstm
*
* */
- std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
- props::RecurrentActivation, props::ReturnSequences,
- props::Bidirectional, props::DropOutRate, props::MaxTimestep>
+ std::tuple<props::ReturnSequences, props::Bidirectional, props::DropOutRate,
+ props::MaxTimestep>
lstm_props;
std::array<unsigned int, 17> wt_idx; /**< indices of the weights */
/**
- * @brief activation function for h_t : default is tanh
- */
- ActiFunc acti_func;
-
- /**
- * @brief activation function for recurrent : default is sigmoid
- */
- ActiFunc recurrent_acti_func;
-
- /**
- * @brief to protect overflow
- */
- float epsilon;
+ * @brief run lstm fowarding for batch_first input
+ *
+ * @param NUM_GATE Number of gate which is 4 for lstm
+ * @param batch_size batch size
+ * @param feature_size feature size
+ * @param disable_bias whether to disable bias or not
+ * @param unit number of output neurons
+ * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+ * @param acti_func activation function for memory cell, cell state
+ * @param recurrent_acti_func activation function for input/output/forget
+ * gate
+ * @param enable_dropout whether to apply dropout
+ * @param dropout_rate dropout rate
+ * @param max_timestep maximum timestep for lstm
+ * @param reverse indicate forward/backward direction for input in
+ * bidirectional lstm
+ * @param input_ input
+ * @param weight_ih weight for input to hidden
+ * @param weight_hh weight for hidden to hidden
+ * @param bias_h bias for input and hidden.
+ * @param bias_ih bias for input
+ * @param bias_hh bias for hidden
+ * @param hidden_state_ hidden state
+ * @param cell_state_ cell state
+ * @param ifgo_ input gate, forget gate, memory cell, output gate
+ * @param mask_ dropout mask
+ */
+ void forwardingBatchFirstLSTM(
+ unsigned int NUM_GATE, const unsigned int batch_size,
+ const unsigned int feature_size, const bool disable_bias,
+ const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
+ ActiFunc &recurrent_acti_func, const bool enable_dropout,
+ const float dropout_rate, const unsigned int max_timestep,
+ const bool reverse, const Tensor &input_, const Tensor &weight_ih,
+ const Tensor &weight_hh, const Tensor &bias_h, const Tensor &bias_ih,
+ const Tensor &bias_hh, Tensor &hidden_state_, Tensor &cell_state_,
+ Tensor &ifgo_, const Tensor &mask_);
+
+ /**
+ * @brief calculate lstm gradient for batch_first input
+ *
+ * @param NUM_GATE Number of gate which is 4 for lstm
+ * @param batch_size batch size
+ * @param feature_size feature size
+ * @param disable_bias whether to disable bias or not
+ * @param unit number of output neurons
+ * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+ * @param acti_func activation function for memory cell, cell state
+ * @param recurrent_acti_func activation function for input/output/forget
+ * gate
+ * @param return_sequences return sequeces
+ * @param bidirectional bidirectional lstm
+ * @param enable_dropout whether to apply dropout
+ * @param dropout_rate dropout rate
+ * @param max_timestep maximum timestep for lstm
+ * @param reverse indicate forward/backward direction for input in
+ * bidirectional lstm
+ * @param input_ input
+ * @param incoming_derivative derivative for output which is incoming
+ * derivative
+ * @param d_weight_ih weight_ih(weight for input to hidden) gradient
+ * @param weight_hh weight for hidden to hidden
+ * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
+ * @param d_bias_h bias_h(bias for input and hidden) gradient
+ * @param d_bias_ih bias_ih(bias for input) gradient
+ * @param d_bias_hh bias_hh(bias for hidden) gradient
+ * @param hidden_state_ hidden state
+ * @param d_hidden_state_ hidden state gradient
+ * @param cell_state_ cell state
+ * @param d_cell_state_ cell state gradient
+ * @param ifgo_ input gate, forget gate, memory cell, output gate
+ * @param d_ifgo_ gradient for input gate, forget gate, memory cell, output
+ * gate
+ * @param mask_ dropout mask
+ */
+ void calcGradientBatchFirstLSTM(
+ unsigned int NUM_GATE, const unsigned int batch_size,
+ const unsigned int feature_size, const bool disable_bias,
+ const unsigned int unit, const bool integrate_bias, ActiFunc &acti_func,
+ ActiFunc &recurrent_acti_func, const bool return_sequences,
+ const bool bidirectional, const bool enable_dropout,
+ const float dropout_rate, const unsigned int max_timestep,
+ const bool reverse, const Tensor &input_, const Tensor &incoming_derivative,
+ Tensor &d_weight_ih, const Tensor &weight_hh, Tensor &d_weight_hh,
+ Tensor &d_bias_h, Tensor &d_bias_ih, Tensor &d_bias_hh,
+ const Tensor &hidden_state_, Tensor &d_hidden_state_,
+ const Tensor &cell_state_, Tensor &d_cell_state_, const Tensor &ifgo_,
+ Tensor &d_ifgo_, const Tensor &mask_);
};
} // namespace nntrainer
#include <layer_context.h>
#include <lstmcell.h>
-#include <lstmcell_core.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>
dropout_mask
};
-LSTMCellLayer::LSTMCellLayer() :
- LayerImpl(),
- lstmcell_props(props::Unit(), props::IntegrateBias(),
- props::HiddenStateActivation() = ActivationType::ACT_TANH,
- props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
- props::DropOutRate()),
- acti_func(ActivationType::ACT_NONE, true),
- recurrent_acti_func(ActivationType::ACT_NONE, true),
- epsilon(1e-3) {
+LSTMCellLayer::LSTMCellLayer() : lstmcell_props(props::DropOutRate()) {
wt_idx.fill(std::numeric_limits<unsigned>::max());
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- NNTR_THROW_IF(std::get<props::Unit>(lstmcell_props).empty(),
+ NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
std::invalid_argument)
<< "unit property missing for lstmcell layer";
- const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
const ActivationType hidden_state_activation_type =
- std::get<props::HiddenStateActivation>(lstmcell_props).get();
+ std::get<props::HiddenStateActivation>(lstmcore_props).get();
const ActivationType recurrent_activation_type =
- std::get<props::RecurrentActivation>(lstmcell_props).get();
+ std::get<props::RecurrentActivation>(lstmcore_props).get();
+
const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
NNTR_THROW_IF(context.getNumInputs() != 3, std::invalid_argument)
void LSTMCellLayer::setProperty(const std::vector<std::string> &values) {
const std::vector<std::string> &remain_props =
loadProperties(values, lstmcell_props);
- LayerImpl::setProperty(remain_props);
+ LSTMCore::setProperty(remain_props);
}
void LSTMCellLayer::exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const {
- LayerImpl::exportTo(exporter, method);
+ LSTMCore::exportTo(exporter, method);
exporter.saveResult(lstmcell_props, method, this);
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
- const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
+ const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props).get();
const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
const Tensor &prev_hidden_state =
Tensor &ifgo = context.getTensor(wt_idx[LSTMCellParams::ifgo]);
- lstmcell_forwarding(batch_size, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state,
- prev_cell_state, hidden_state, cell_state, weight_ih,
- weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+ forwardLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state, prev_cell_state,
+ hidden_state, cell_state, weight_ih, weight_hh, bias_h, bias_ih,
+ bias_hh, ifgo);
if (dropout_rate > epsilon && training) {
Tensor &dropout_mask =
Tensor &outgoing_derivative =
context.getOutgoingDerivative(INOUT_INDEX::INPUT);
- lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgo);
+ calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgo);
}
void LSTMCellLayer::calcGradient(RunLayerContext &context) {
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
const float dropout_rate = std::get<props::DropOutRate>(lstmcell_props);
const Tensor &input = context.getInput(INOUT_INDEX::INPUT);
d_hidden_state.multiply(dropout_mask, d_hidden_state_masked);
}
- lstmcell_calcGradient(
- batch_size, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state, d_prev_hidden_state,
- prev_cell_state, d_prev_cell_state,
- dropout_rate > epsilon ? d_hidden_state_masked : d_hidden_state, cell_state,
- d_cell_state, d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
- d_bias_hh, ifgo, d_ifgo);
+ calcGradientLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state,
+ d_prev_hidden_state, prev_cell_state, d_prev_cell_state,
+ dropout_rate > epsilon ? d_hidden_state_masked
+ : d_hidden_state,
+ cell_state, d_cell_state, d_weight_ih, weight_hh,
+ d_weight_hh, d_bias_h, d_bias_ih, d_bias_hh, ifgo, d_ifgo);
}
void LSTMCellLayer::setBatch(RunLayerContext &context, unsigned int batch) {
#include <acti_func.h>
#include <common_properties.h>
-#include <layer_impl.h>
+#include <lstmcell_core.h>
namespace nntrainer {
* @class LSTMCellLayer
* @brief LSTMCellLayer
*/
-class LSTMCellLayer : public LayerImpl {
+class LSTMCellLayer : public LSTMCore {
public:
/**
* @brief Constructor of LSTMCellLayer
OUTPUT_CELL_STATE = 1
};
+ /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+ * RecurrentActivation are in lstmcore_props */
+
/**
- * Unit: number of output neurons
- * IntegrateBias: integrate bias_ih, bias_hh to bias_h
- * HiddenStateActivation: activation type for hidden state. default is tanh
- * RecurrentActivation: activation type for recurrent. default is sigmoid
* DropOutRate: dropout rate
*
* */
- std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
- props::RecurrentActivation, props::DropOutRate>
- lstmcell_props;
- std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
-
- /**
- * @brief activation function for h_t : default is tanh
- */
- ActiFunc acti_func;
+ std::tuple<props::DropOutRate> lstmcell_props;
- /**
- * @brief activation function for recurrent : default is sigmoid
- */
- ActiFunc recurrent_acti_func;
-
- /**
- * @brief to protect overflow
- */
- float epsilon;
+ std::array<unsigned int, 7> wt_idx; /**< indices of the weights */
};
} // namespace nntrainer
*
* @file lstmcell_core.cpp
* @date 25 November 2021
- * @brief These are lstm core functions.
+ * @brief This is lstm core class.
* @see https://github.com/nnstreamer/nntrainer
* @author hyeonseok lee <hs89.lee@samsung.com>
* @bug No known bugs except for NYI items
namespace nntrainer {
-void lstmcell_forwarding(const unsigned int batch_size, const unsigned int unit,
- const bool disable_bias, const bool integrate_bias,
- ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
- const Tensor &input, const Tensor &prev_hidden_state,
- const Tensor &prev_cell_state, Tensor &hidden_state,
- Tensor &cell_state, const Tensor &weight_ih,
- const Tensor &weight_hh, const Tensor &bias_h,
- const Tensor &bias_ih, const Tensor &bias_hh,
- Tensor &ifgo) {
+LSTMCore::LSTMCore() :
+ LayerImpl(),
+ lstmcore_props(props::Unit(), props::IntegrateBias(),
+ props::HiddenStateActivation() = ActivationType::ACT_TANH,
+ props::RecurrentActivation() = ActivationType::ACT_SIGMOID),
+ acti_func(ActivationType::ACT_NONE, true),
+ recurrent_acti_func(ActivationType::ACT_NONE, true),
+ epsilon(1e-3) {}
+
+void LSTMCore::forwardLSTM(const unsigned int batch_size,
+ const unsigned int unit, const bool disable_bias,
+ const bool integrate_bias, ActiFunc &acti_func,
+ ActiFunc &recurrent_acti_func, const Tensor &input,
+ const Tensor &prev_hidden_state,
+ const Tensor &prev_cell_state, Tensor &hidden_state,
+ Tensor &cell_state, const Tensor &weight_ih,
+ const Tensor &weight_hh, const Tensor &bias_h,
+ const Tensor &bias_ih, const Tensor &bias_hh,
+ Tensor &ifgo) {
input.dot(weight_ih, ifgo);
prev_hidden_state.dot(weight_hh, ifgo, false, false, 1.0);
if (!disable_bias) {
hidden_state.multiply_i_strided(output_gate);
}
-void lstmcell_calcDerivative(Tensor &outgoing_derivative,
- const Tensor &weight_ih, const Tensor &d_ifgo,
- const float alpha) {
+void LSTMCore::calcDerivativeLSTM(Tensor &outgoing_derivative,
+ const Tensor &weight_ih, const Tensor &d_ifgo,
+ const float alpha) {
d_ifgo.dot(weight_ih, outgoing_derivative, false, true, alpha);
}
-void lstmcell_calcGradient(
+void LSTMCore::calcGradientLSTM(
const unsigned int batch_size, const unsigned int unit,
const bool disable_bias, const bool integrate_bias, ActiFunc &acti_func,
ActiFunc &recurrent_acti_func, const Tensor &input,
d_ifgo.dot(weight_hh, d_prev_hidden_state, false, true);
}
+void LSTMCore::setProperty(const std::vector<std::string> &values) {
+ const std::vector<std::string> &remain_props =
+ loadProperties(values, lstmcore_props);
+ LayerImpl::setProperty(remain_props);
+}
+
+void LSTMCore::exportTo(Exporter &exporter,
+ const ml::train::ExportMethods &method) const {
+ LayerImpl::exportTo(exporter, method);
+ exporter.saveResult(lstmcore_props, method, this);
+}
+
} // namespace nntrainer
*
* @file lstmcell_core.h
* @date 25 November 2021
- * @brief These are lstm core functions.
+ * @brief This is lstm core class.
* @see https://github.com/nnstreamer/nntrainer
* @author hyeonseok lee <hs89.lee@samsung.com>
* @bug No known bugs except for NYI items
#ifdef __cplusplus
#include <acti_func.h>
+#include <common.h>
+#include <layer_impl.h>
+#include <node_exporter.h>
namespace nntrainer {
/**
- * @brief lstm cell forwarding implementation
- *
- * @param batch_size batch size
- * @param unit number of output neurons
- * @param disable_bias whether to disable bias or not
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param input input
- * @param prev_hidden_state previous hidden state
- * @param prev_cell_state previous cell state
- * @param hidden_state hidden state
- * @param cell_state cell state
- * @param weight_ih weight for input to hidden
- * @param weight_hh weight for hidden to hidden
- * @param bias_h bias for input and hidden.
- * @param bias_ih bias for input
- * @param bias_hh bias for hidden
- * @param ifgo input gate, forget gate, memory cell, output gate
+ * @class LSTMCore
+ * @brief LSTMCore
*/
-void lstmcell_forwarding(const unsigned int batch_size, const unsigned int unit,
- const bool disable_bias, const bool integrate_bias,
- ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
- const Tensor &input, const Tensor &prev_hidden_state,
- const Tensor &prev_cell_state, Tensor &hidden_state,
- Tensor &cell_state, const Tensor &weight_ih,
- const Tensor &weight_hh, const Tensor &bias_h,
- const Tensor &bias_ih, const Tensor &bias_hh,
- Tensor &ifgo);
+class LSTMCore : public LayerImpl {
+public:
+ /**
+ * @brief Constructor of LSTMCore
+ */
+ LSTMCore();
-/**
- * @brief lstm cell calculate derivative implementation
- *
- * @param outgoing_derivative derivative for input
- * @param weight_ih weight for input to hidden
- * @param d_ifgo gradient for input gate, forget gate, memory cell, output gate
- * @param alpha value to be scale outgoing_derivative
- */
-void lstmcell_calcDerivative(Tensor &outgoing_derivative,
- const Tensor &weight_ih, const Tensor &d_ifgo,
- const float alpha = 0.0f);
+ /**
+ * @brief Destructor of LSTMCore
+ */
+ ~LSTMCore() = default;
-/**
- * @brief lstm cell calculate gradient implementation
- *
- * @param batch_size batch size
- * @param unit number of output neurons
- * @param disable_bias whether to disable bias or not
- * @param integrate_bias integrate bias_ih, bias_hh to bias_h
- * @param acti_func activation function for memory cell, cell state
- * @param recurrent_acti_func activation function for input/output/forget
- * gate
- * @param input input
- * @param prev_hidden_state previous hidden state
- * @param d_prev_hidden_state previous hidden state gradient
- * @param prev_cell_state previous cell state
- * @param d_prev_cell_state previous cell state gradient
- * @param d_hidden_state hidden state gradient
- * @param cell_state cell state
- * @param d_cell_state cell state gradient
- * @param d_weight_ih weight_ih(weight for input to hidden) gradient
- * @param weight_hh weight for hidden to hidden
- * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
- * @param d_bias_h bias_h(bias for input and hidden) gradient
- * @param d_bias_ih bias_ih(bias for input) gradient
- * @param d_bias_hh bias_hh(bias for hidden) gradient
- * @param ifgo input gate, forget gate, memory cell, output gate
- * @param d_ifgo gradient for input gate, forget gate, memory cell, output gate
- */
-void lstmcell_calcGradient(
- const unsigned int batch_size, const unsigned int unit,
- const bool disable_bias, const bool integrate_bias, ActiFunc &acti_func,
- ActiFunc &recurrent_acti_func, const Tensor &input,
- const Tensor &prev_hidden_state, Tensor &d_prev_hidden_state,
- const Tensor &prev_cell_state, Tensor &d_prev_cell_state,
- const Tensor &d_hidden_state, const Tensor &cell_state,
- const Tensor &d_cell_state, Tensor &d_weight_ih, const Tensor &weight_hh,
- Tensor &d_weight_hh, Tensor &d_bias_h, Tensor &d_bias_ih, Tensor &d_bias_hh,
- const Tensor &ifgo, Tensor &d_ifgo);
+ /**
+ * @brief lstm cell forwarding implementation
+ *
+ * @param batch_size batch size
+ * @param unit number of output neurons
+ * @param disable_bias whether to disable bias or not
+ * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+ * @param acti_func activation function for memory cell, cell state
+ * @param recurrent_acti_func activation function for input/output/forget
+ * gate
+ * @param input input
+ * @param prev_hidden_state previous hidden state
+ * @param prev_cell_state previous cell state
+ * @param hidden_state hidden state
+ * @param cell_state cell state
+ * @param weight_ih weight for input to hidden
+ * @param weight_hh weight for hidden to hidden
+ * @param bias_h bias for input and hidden.
+ * @param bias_ih bias for input
+ * @param bias_hh bias for hidden
+ * @param ifgo input gate, forget gate, memory cell, output gate
+ */
+ void forwardLSTM(const unsigned int batch_size, const unsigned int unit,
+ const bool disable_bias, const bool integrate_bias,
+ ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
+ const Tensor &input, const Tensor &prev_hidden_state,
+ const Tensor &prev_cell_state, Tensor &hidden_state,
+ Tensor &cell_state, const Tensor &weight_ih,
+ const Tensor &weight_hh, const Tensor &bias_h,
+ const Tensor &bias_ih, const Tensor &bias_hh, Tensor &ifgo);
+
+ /**
+ * @brief lstm cell calculate derivative implementation
+ *
+ * @param outgoing_derivative derivative for input
+ * @param weight_ih weight for input to hidden
+ * @param d_ifgo gradient for input gate, forget gate, memory cell, output
+ * gate
+ * @param alpha value to be scale outgoing_derivative
+ */
+ void calcDerivativeLSTM(Tensor &outgoing_derivative, const Tensor &weight_ih,
+ const Tensor &d_ifgo, const float alpha = 0.0f);
+
+ /**
+ * @brief lstm cell calculate gradient implementation
+ *
+ * @param batch_size batch size
+ * @param unit number of output neurons
+ * @param disable_bias whether to disable bias or not
+ * @param integrate_bias integrate bias_ih, bias_hh to bias_h
+ * @param acti_func activation function for memory cell, cell state
+ * @param recurrent_acti_func activation function for input/output/forget
+ * gate
+ * @param input input
+ * @param prev_hidden_state previous hidden state
+ * @param d_prev_hidden_state previous hidden state gradient
+ * @param prev_cell_state previous cell state
+ * @param d_prev_cell_state previous cell state gradient
+ * @param d_hidden_state hidden state gradient
+ * @param cell_state cell state
+ * @param d_cell_state cell state gradient
+ * @param d_weight_ih weight_ih(weight for input to hidden) gradient
+ * @param weight_hh weight for hidden to hidden
+ * @param d_weight_hh weight_hh(weight for hidden to hidden) gradient
+ * @param d_bias_h bias_h(bias for input and hidden) gradient
+ * @param d_bias_ih bias_ih(bias for input) gradient
+ * @param d_bias_hh bias_hh(bias for hidden) gradient
+ * @param ifgo input gate, forget gate, memory cell, output gate
+ * @param d_ifgo gradient for input gate, forget gate, memory cell, output
+ * gate
+ */
+ void calcGradientLSTM(const unsigned int batch_size, const unsigned int unit,
+ const bool disable_bias, const bool integrate_bias,
+ ActiFunc &acti_func, ActiFunc &recurrent_acti_func,
+ const Tensor &input, const Tensor &prev_hidden_state,
+ Tensor &d_prev_hidden_state,
+ const Tensor &prev_cell_state,
+ Tensor &d_prev_cell_state, const Tensor &d_hidden_state,
+ const Tensor &cell_state, const Tensor &d_cell_state,
+ Tensor &d_weight_ih, const Tensor &weight_hh,
+ Tensor &d_weight_hh, Tensor &d_bias_h,
+ Tensor &d_bias_ih, Tensor &d_bias_hh,
+ const Tensor &ifgo, Tensor &d_ifgo);
+
+ /**
+ * @copydoc Layer::setProperty(const PropertyType type, const std::string
+ * &value)
+ */
+ void setProperty(const std::vector<std::string> &values) override;
+
+ /**
+ * @copydoc Layer::exportTo(Exporter &exporter, ml::train::ExportMethods
+ * method)
+ */
+ void exportTo(Exporter &exporter,
+ const ml::train::ExportMethods &method) const override;
+
+protected:
+ /**
+ * Unit: number of output neurons
+ * IntegrateBias: integrate bias_ih, bias_hh to bias_h
+ * HiddenStateActivation: activation type for hidden state. default is tanh
+ * RecurrentActivation: activation type for recurrent. default is sigmoid
+ *
+ * */
+ std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
+ props::RecurrentActivation>
+ lstmcore_props;
+
+ /**
+ * @brief activation function: default is tanh
+ */
+ ActiFunc acti_func;
+
+ /**
+ * @brief activation function for recurrent: default is sigmoid
+ */
+ ActiFunc recurrent_acti_func;
+ /**
+ * @brief to protect overflow
+ */
+ float epsilon;
+};
} // namespace nntrainer
#endif /* __cplusplus */
};
ZoneoutLSTMCellLayer::ZoneoutLSTMCellLayer() :
- LayerImpl(),
- zoneout_lstmcell_props(
- props::Unit(), props::IntegrateBias(),
- props::HiddenStateActivation() = ActivationType::ACT_TANH,
- props::RecurrentActivation() = ActivationType::ACT_SIGMOID,
- HiddenStateZoneOutRate(), CellStateZoneOutRate(), Test(),
- props::MaxTimestep(), props::Timestep()),
- acti_func(ActivationType::ACT_NONE, true),
- recurrent_acti_func(ActivationType::ACT_NONE, true),
- epsilon(1e-3) {
+ zoneout_lstmcell_props(HiddenStateZoneOutRate(), CellStateZoneOutRate(),
+ Test(), props::MaxTimestep(), props::Timestep()) {
wt_idx.fill(std::numeric_limits<unsigned>::max());
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- NNTR_THROW_IF(std::get<props::Unit>(zoneout_lstmcell_props).empty(),
+ NNTR_THROW_IF(std::get<props::Unit>(lstmcore_props).empty(),
std::invalid_argument)
<< "unit property missing for zoneout_lstmcell layer";
- const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
const ActivationType hidden_state_activation_type =
- std::get<props::HiddenStateActivation>(zoneout_lstmcell_props).get();
+ std::get<props::HiddenStateActivation>(lstmcore_props).get();
const ActivationType recurrent_activation_type =
- std::get<props::RecurrentActivation>(zoneout_lstmcell_props).get();
+ std::get<props::RecurrentActivation>(lstmcore_props).get();
+
const bool test = std::get<Test>(zoneout_lstmcell_props).get();
const unsigned int max_timestep =
std::get<props::MaxTimestep>(zoneout_lstmcell_props).get();
void ZoneoutLSTMCellLayer::setProperty(const std::vector<std::string> &values) {
const std::vector<std::string> &remain_props =
loadProperties(values, zoneout_lstmcell_props);
- LayerImpl::setProperty(remain_props);
+ LSTMCore::setProperty(remain_props);
}
void ZoneoutLSTMCellLayer::exportTo(
Exporter &exporter, const ml::train::ExportMethods &method) const {
- LayerImpl::exportTo(exporter, method);
+ LSTMCore::exportTo(exporter, method);
exporter.saveResult(zoneout_lstmcell_props, method, this);
}
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
const float hidden_state_zoneout_rate =
std::get<HiddenStateZoneOutRate>(zoneout_lstmcell_props).get();
const float cell_state_zoneout_rate =
Tensor &lstm_cell_state =
context.getTensor(wt_idx[ZoneoutLSTMParams::lstm_cell_state]);
- lstmcell_forwarding(batch_size, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state,
- prev_cell_state, hidden_state, lstm_cell_state, weight_ih,
- weight_hh, bias_h, bias_ih, bias_hh, ifgo);
+ forwardLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state, prev_cell_state,
+ hidden_state, lstm_cell_state, weight_ih, weight_hh, bias_h,
+ bias_ih, bias_hh, ifgo);
if (training) {
Tensor &hs_zoneout_mask =
context.getWeight(wt_idx[ZoneoutLSTMParams::weight_ih]);
const Tensor &d_ifgo = context.getTensorGrad(wt_idx[ZoneoutLSTMParams::ifgo]);
- lstmcell_calcDerivative(outgoing_derivative, weight_ih, d_ifgo);
+ calcDerivativeLSTM(outgoing_derivative, weight_ih, d_ifgo);
}
void ZoneoutLSTMCellLayer::calcGradient(RunLayerContext &context) {
const bool disable_bias =
std::get<props::DisableBias>(*layer_impl_props).get();
- const unsigned int unit = std::get<props::Unit>(zoneout_lstmcell_props).get();
+ const unsigned int unit = std::get<props::Unit>(lstmcore_props).get();
const bool integrate_bias =
- std::get<props::IntegrateBias>(zoneout_lstmcell_props).get();
+ std::get<props::IntegrateBias>(lstmcore_props).get();
+
const bool test = std::get<Test>(zoneout_lstmcell_props).get();
const unsigned int max_timestep =
std::get<props::MaxTimestep>(zoneout_lstmcell_props).get();
d_prev_cell_state_residual);
d_cell_state.multiply(cell_state_zoneout_mask, d_lstm_cell_state);
- lstmcell_calcGradient(
- batch_size, unit, disable_bias, integrate_bias, acti_func,
- recurrent_acti_func, input, prev_hidden_state, d_prev_hidden_state,
- prev_cell_state, d_prev_cell_state, d_hidden_state_masked, lstm_cell_state,
- d_lstm_cell_state, d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
- d_bias_hh, ifgo, d_ifgo);
+ calcGradientLSTM(batch_size, unit, disable_bias, integrate_bias, acti_func,
+ recurrent_acti_func, input, prev_hidden_state,
+ d_prev_hidden_state, prev_cell_state, d_prev_cell_state,
+ d_hidden_state_masked, lstm_cell_state, d_lstm_cell_state,
+ d_weight_ih, weight_hh, d_weight_hh, d_bias_h, d_bias_ih,
+ d_bias_hh, ifgo, d_ifgo);
d_prev_hidden_state.add_i(d_prev_hidden_state_residual);
d_prev_cell_state.add_i(d_prev_cell_state_residual);
#include <acti_func.h>
#include <common_properties.h>
-#include <layer_impl.h>
#include <lstmcell_core.h>
namespace nntrainer {
* @class ZoneoutLSTMCellLayer
* @brief ZoneoutLSTMCellLayer
*/
-class ZoneoutLSTMCellLayer : public LayerImpl {
+class ZoneoutLSTMCellLayer : public LSTMCore {
public:
/**
* @brief HiddenStateZoneOutRate property, this defines zone out rate for
OUTPUT_CELL_STATE = 1
};
+ /** common properties like Unit, IntegrateBias, HiddenStateActivation and
+ * RecurrentActivation are in lstmcore_props */
+
/**
- * Unit: number of output neurons
- * IntegrateBias: integrate bias_ih, bias_hh to bias_h
- * HiddenStateActivation: activation type for hidden state. default is tanh
- * RecurrentActivation: activation type for recurrent. default is sigmoid
* HiddenStateZoneOutRate: zoneout rate for hidden_state
* CellStateZoneOutRate: zoneout rate for cell_state
* Test: property for test mode
* TimeStep: timestep for which lstm should operate
*
* */
- std::tuple<props::Unit, props::IntegrateBias, props::HiddenStateActivation,
- props::RecurrentActivation, HiddenStateZoneOutRate,
- CellStateZoneOutRate, Test, props::MaxTimestep, props::Timestep>
+ std::tuple<HiddenStateZoneOutRate, CellStateZoneOutRate, Test,
+ props::MaxTimestep, props::Timestep>
zoneout_lstmcell_props;
std::array<unsigned int, 9> wt_idx; /**< indices of the weights */
-
- /**
- * @brief activation function for h_t : default is tanh
- */
- ActiFunc acti_func;
-
- /**
- * @brief activation function for recurrent : default is sigmoid
- */
- ActiFunc recurrent_acti_func;
-
- /**
- * @brief Protect overflow
- */
- float epsilon;
};
} // namespace nntrainer