h_prev.setZero();
c_prev.setZero();
- Tensor hidden_;
- hidden_ = hidden->getVariableRef();
+ Tensor &hidden_ = hidden->getVariableRef();
Tensor &input_ = net_input[0]->getVariableRef();
Tensor &m_cell_ = mem_cell->getVariableRef();
this->acti_func = from->acti_func;
this->recurrent_activation_type = from->recurrent_activation_type;
this->recurrent_acti_func = from->recurrent_acti_func;
+ this->return_sequences = from->return_sequences;
}
void LSTMLayer::calcDerivative() {
fgio->getGradientRef().setZero();
Tensor derivative_ = hidden->getGradientRef();
- Tensor hidden_;
if (!return_sequences) {
TensorDim d = derivative_.getDim();
derivative_.getData());
}
- hidden_ = hidden->getVariableRef();
+ Tensor &hidden_ = hidden->getVariableRef();
Tensor &input_ = net_input[0]->getVariableRef();
Tensor &m_cell_ = mem_cell->getVariableRef();
bias_dim.batch(input_dim[0].batch());
h_prev = Tensor(bias_dim);
- h_prev.setZero();
TensorDim d = input_dim[0];
d.width(unit);
// test will fail. Becuase it modifies the date during gradient calculation
// TODO : We could control with something like #define test to save memory
hidden = std::make_shared<Var_Grad>(d, true, true, "RNN:temp_hidden");
- hidden->getVariableRef().setZero();
- hidden->getGradientRef().setZero();
if (Layer::activation_type == ActivationType::ACT_NONE) {
Layer::activation_type = ActivationType::ACT_TANH;
Tensor &bias_h =
weightAt(static_cast<int>(RNNParams::bias_h)).getVariableRef();
- Tensor hidden_;
- hidden_ = hidden->getVariableRef();
+ hidden->getVariableRef().setZero();
+
+ if (training) {
+ hidden->getGradientRef().setZero();
+ }
+ h_prev.setZero();
+ Tensor &hidden_ = hidden->getVariableRef();
Tensor &input_ = net_input[0]->getVariableRef();
Tensor temp;
Tensor hs_prev;
Tensor hs;
- if (training)
- h_prev.setZero();
-
// TODO : check merge b and t index
for (unsigned int b = 0; b < input_dim[0].batch(); ++b) {
Tensor islice = input_.getBatchSlice(b, 1);
Tensor oslice = hidden_.getBatchSlice(b, 1);
for (unsigned int t = 0; t < islice.height(); ++t) {
- Tensor xs = islice.getSharedDataTensor(TensorDim(1, 1, 1, islice.width()),
- t * islice.width());
- // Calculate Hidden
- // activation(xs.dot(weight_xh).add(hs_prev.dot(weight_hh).add(bias_h)));
- hs = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()),
- t * oslice.width());
+ Tensor xs =
+ islice.getSharedDataTensor({islice.width()}, t * islice.width());
+
+ hs = oslice.getSharedDataTensor({oslice.width()}, t * oslice.width());
if (t > 0) {
- hs_prev = oslice.getSharedDataTensor(TensorDim(1, 1, 1, oslice.width()),
+ hs_prev = oslice.getSharedDataTensor({oslice.width()},
(t - 1) * oslice.width());
} else {
hs_prev = h_prev.getBatchSlice(b, 1);
std::shared_ptr<RNNLayer> from = std::static_pointer_cast<RNNLayer>(l);
this->unit = from->unit;
+ this->return_sequences = from->return_sequences;
+ this->acti_func = from->acti_func;
}
void RNNLayer::calcDerivative() {
- Tensor derivative_;
- derivative_ = hidden->getGradientRef();
-
+ Tensor &derivative_ = hidden->getGradientRef();
Tensor &weight =
weightAt(static_cast<int>(RNNParams::weight_xh)).getVariableRef();
Tensor &ret_ = net_input[0]->getGradientRef();
Tensor &weight_hh =
weightAt(static_cast<int>(RNNParams::weight_hh)).getVariableRef();
- Tensor derivative_;
- Tensor hidden_;
- derivative_ = hidden->getGradientRef();
+ djdw_x.setZero();
+ djdw_h.setZero();
+ djdb_h.setZero();
+
+ Tensor &derivative_ = hidden->getGradientRef();
if (!return_sequences) {
TensorDim d = derivative_.getDim();
derivative_.copy(net_hidden[0]->getGradientRef());
}
- hidden_ = hidden->getVariableRef();
+ Tensor &hidden_ = hidden->getVariableRef();
Tensor &input_ = net_input[0]->getVariableRef();
Tensor dh_nx = Tensor(TensorDim(1, 1, 1, derivative_.width()));
acti_func.run_prime_fn(hs, dh, dh);
djdb_h.add_i(dh);
- djdw_x.add_i(xs.dot(dh, true, false));
- djdw_h.add_i(hs_prev.dot(dh, true, false));
- dh.dot(weight_hh, dh_nx, false, true, 1.0);
+ xs.dot(dh, djdw_x, true, false, 1.0);
+ hs_prev.dot(dh, djdw_h, true, false, 1.0);
+ dh.dot(weight_hh, dh_nx, false, true);
}
}
}
rnn = K.layers.SimpleRNN(2, return_sequences=True)
rnn_layer_return_sequence_tc(rnn)(file_name="rnn_return_sequences.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],)
+ rnn_layer_return_sequence_with_batch = lambda rnn_layer: partial(
+ record,
+ model=[
+ K.Input(batch_shape=(2, 2, 1)),
+ rnn_layer,
+ K.layers.Dense(1)
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=1,
+ input_shape=(2,2,1),
+ label_shape=(2,2,1),
+ is_onehot=False,
+ loss_fn_str="mse"
+ )
+ rnn = K.layers.SimpleRNN(2, return_sequences=True)
+ rnn_layer_return_sequence_with_batch(rnn)(file_name="rnn_return_sequence_with_batch.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label"],)
+
+
+ rnn_layer_return_sequence_with_batch_n = lambda rnn_layer: partial(
+ record,
+ model=[
+ K.Input(batch_shape=(2, 2, 1)),
+ rnn_layer,
+ K.layers.Dense(1)
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=2,
+ input_shape=(2,2,1),
+ label_shape=(2,2,1),
+ is_onehot=False,
+ loss_fn_str="mse"
+ )
+ rnn = K.layers.SimpleRNN(2, return_sequences=True)
+ rnn_layer_return_sequence_with_batch_n(rnn)(file_name="rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)
+
+ multi_rnn_layer_return_sequence = partial(
+ record,
+ model=[
+ K.Input(batch_shape=(1, 2, 1)),
+ K.layers.SimpleRNN(2, return_sequences=True),
+ K.layers.SimpleRNN(2),
+ K.layers.Dense(1)
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=1,
+ input_shape=(1,2,1),
+ label_shape=(1,1,1),
+ is_onehot=False,
+ loss_fn_str="mse"
+ )
+ multi_rnn_layer_return_sequence(file_name="multi_rnn_return_sequence.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)
+
+ multi_rnn_layer_return_sequence_with_batch_n = partial(
+ record,
+ model=[
+ K.Input(batch_shape=(2, 2, 1)),
+ K.layers.SimpleRNN(2, return_sequences=True),
+ K.layers.SimpleRNN(2),
+ K.layers.Dense(1)
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=2,
+ input_shape=(2,2,1),
+ label_shape=(2,1),
+ is_onehot=False,
+ loss_fn_str="mse"
+ )
+ multi_rnn_layer_return_sequence_with_batch_n(file_name="multi_rnn_return_sequence_with_batch_n.info", debug=["summary", "initial_weights", "dx", "output", "layer_name", "label","weights","gradients"],)
}
);
+INI rnn_return_sequence_with_batch(
+ "rnn_return_sequence_with_batch",
+ {
+ nn_base + "loss=mse | batch_size=2",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("rnn") + rnn_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+ }
+);
+
+INI rnn_return_sequence_with_batch_n(
+ "rnn_return_sequence_with_batch_n",
+ {
+ nn_base + "loss=mse | batch_size=2",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("rnn") + rnn_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn"
+ }
+);
+
+INI multi_rnn_return_sequence(
+ "multi_rnn_return_sequence",
+ {
+ nn_base + "loss=mse | batch_size=1",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("rnn") + rnn_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("rnn2") + rnn_base +
+ "unit = 2" + "input_layers=rnn",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
+ }
+);
+
+
+INI multi_rnn_return_sequence_with_batch_n(
+ "multi_rnn_return_sequence_with_batch_n",
+ {
+ nn_base + "loss=mse | batch_size=2",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("rnn") + rnn_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("rnn2") + rnn_base +
+ "unit = 2" + "input_layers=rnn",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=rnn2"
+ }
+);
INSTANTIATE_TEST_CASE_P(
mkModelTc(lstm_return_sequence_with_batch, "2:1:2:1", 1),
mkModelTc(lstm_return_sequence_with_batch_n, "2:1:2:1", 2),
mkModelTc(multi_lstm_return_sequence, "1:1:1:1", 1),
- mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2)
+ mkModelTc(multi_lstm_return_sequence_with_batch_n, "2:1:1:1", 2),
+ mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 1),
+ mkModelTc(rnn_return_sequence_with_batch_n, "2:1:2:1", 2),
+ mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 1),
+ mkModelTc(multi_rnn_return_sequence_with_batch_n, "2:1:1:1", 2)
+
// / #if gtest_version <= 1.7.0
));
/// #else gtest_version > 1.8.0