h_prev = Tensor(h_dim);
- if (LayerV1::activation_type == ActivationType::ACT_NONE) {
- LayerV1::activation_type = ActivationType::ACT_TANH;
- acti_func.setActiFunc(activation_type);
+ if (hidden_state_activation_type == ActivationType::ACT_NONE) {
+ hidden_state_activation_type = ActivationType::ACT_TANH;
+ acti_func.setActiFunc(hidden_state_activation_type);
}
if (recurrent_activation_type == ActivationType::ACT_NONE) {
Tensor hs_prev;
Tensor hs;
+ // zt = sigma(W_hz.h_prev + W_xz.xs)
+ // rt = sigma(W_hr.h_prev + W_xr.xs)
+ // gt = tanh((h_prev*rt).W_hr + W_xg.xs)
+ // h_nx = (1-zt)*gt + zt*h_prev
+
for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
Tensor islice = input_.getBatchSlice(b, 1);
Tensor oslice = hidden_.getBatchSlice(b, 1);
// TODO : We could control with something like #define test to save memory
hidden = std::make_shared<Var_Grad>(d, true, true, "RNN:temp_hidden");
- if (LayerV1::activation_type == ActivationType::ACT_NONE) {
- LayerV1::activation_type = ActivationType::ACT_TANH;
- acti_func.setActiFunc(activation_type);
+ if (hidden_state_activation_type == ActivationType::ACT_NONE) {
+ hidden_state_activation_type = ActivationType::ACT_TANH;
+ acti_func.setActiFunc(hidden_state_activation_type);
}
return status;
output_dim[0].width(unit);
}
break;
- case PropertyType::activation:
+ case PropertyType::hidden_state_activation:
if (!value.empty()) {
ActivationType acti_type = (ActivationType)parseType(value, TOKEN_ACTI);
- LayerV1::activation_type = acti_type;
+ hidden_state_activation_type = acti_type;
acti_func.setActiFunc(acti_type);
}
break;
std::shared_ptr<RNNLayer> from = std::static_pointer_cast<RNNLayer>(l);
this->unit = from->unit;
+ this->hidden_state_activation_type = from->hidden_state_activation_type;
this->return_sequences = from->return_sequences;
this->acti_func = from->acti_func;
}
* @brief Constructor of RNNLayer
*/
template <typename... Args>
- RNNLayer(unsigned int unit_ = 0, bool sequence = false, Args... args) :
+ RNNLayer(
+ unsigned int unit_ = 0,
+ ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
+ bool sequence = false, Args... args) :
LayerV1(args...),
unit(unit_),
+ hidden_state_activation_type(hidden_state_activation_type_),
return_sequences(sequence){};
/**
unsigned int unit;
/**
+ * @brief activation type for recurrent : default is tanh
+ */
+ ActivationType hidden_state_activation_type;
+
+ /**
* @brief activation function for h_t : default is tanh
*/
ActiFunc acti_func;
)
multi_rnn_layer_tc(1,2)(file_name="multi_rnn_return_sequence.info")
multi_rnn_layer_tc(2,2)(file_name="multi_rnn_return_sequence_with_batch.info")
-
- gru_layer_tc = lambda batch, time, return_sequences: partial(
- record,
- model=[
- K.Input(batch_shape=(batch,time, 1)),
- K.layers.GRU(
- time,
- recurrent_activation="sigmoid",
- activation="tanh",
- return_sequences=return_sequences,
- ),
- K.layers.Dense(1),
- ],
- optimizer=opt.SGD(learning_rate=0.1),
- iteration=10,
- input_shape=(batch, time, 1),
- label_shape=(batch, 1),
- is_onehot=False,
- loss_fn_str="mse",
- )
-
- gru_layer_tc(1, 1, False)(file_name="gru_basic.info")
}
);
-INI gru_basic(
- "gru_basic",
- {
- nn_base + "loss=mse | batch_size=1",
- sgd_base + "learning_rate = 0.1",
- I("input") + input_base + "input_shape=1:1:1",
- I("gru") + gru_base +
- "unit = 1" + "input_layers=input",
- I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
- }
-);
-
INSTANTIATE_TEST_CASE_P(
nntrainerModelAutoTests, nntrainerModelTest, ::testing::Values(
mkModelTc(fc_sigmoid_mse, "3:1:1:10", 10),
mkModelTc(rnn_return_sequences, "1:1:2:1", 10),
mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 10),
mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 10),
- mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10),
- mkModelTc(gru_basic, "1:1:1:1", 1)
+ mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10)
), [](const testing::TestParamInfo<nntrainerModelTest::ParamType>& info){
return std::get<0>(info.param).getName();
});