* @author Jijoong Moon <jijoong.moon@samsung.com>
* @bug No known bugs except for NYI items
*
+ * h_prev --------d1------->[*]-------d0----->[+]---d0--> h
+ * dh_nx | | | | d0 dh
+ * | d14 | d2 d3 |
+ * | | +-----[1-]------>[*]
+ * | [*]<---+ d15 |d5 | d6
+ * | | |rt | zt |gt
+ * | | [sig] [sig] [tanh]
+ * | | |d16 | d7 |d8
+ * | | [+] [+] [+]
+ * | | / \d16 | \ d7 / \ d8
+ * | | Wxhr Whhr Wxhz Whhz Wxhg Whhg
+ * | | |d17 |d13 |d12 |d11 |d10 | d9
+ * +- |--+------|---+ | | |
+ * +---------|--------|----------+ |
+ * xs------------------+--------+---------------+
*/
#include <cmath>
hs_prev = h_prev.getBatchSlice(b, 1);
}
- xs.dot(weight_xh, zrg_t);
+ xs.dot(weight_xh, zrg_t); // x_z, x_r, x_g
Tensor ztrt = zrg_t.getSharedDataTensor({unit * 2}, 0);
Tensor ztrt_b = bias_h.getSharedDataTensor({unit * 2}, 0);
- Tensor w_hh = weight_hh.getSharedDataTensor({unit * unit * 2}, 0);
- Tensor w_g =
- weight_hh.getSharedDataTensor({unit * unit}, unit * unit * 2);
+ Tensor w_hh;
+ w_hh.copy_with_stride(
+ weight_hh.getSharedDataTensor({1, 1, unit, unit * 2}, 0, false));
+ Tensor w_g;
+ w_g.copy_with_stride(
+ weight_hh.getSharedDataTensor({1, 1, unit, unit}, unit * 2, false));
+
Tensor gt = zrg_t.getSharedDataTensor({unit}, unit * 2);
Tensor gt_b = bias_h.getSharedDataTensor({unit}, unit * 2);
recurrent_acti_func.run_fn(rt, rt);
recurrent_acti_func.run_fn(zt, zt);
- gt.add_i(rt.multiply(hs_prev).dot(w_g));
+ Tensor temp;
+ rt.multiply(hs_prev, temp);
+ gt.add_i(temp.dot(w_g));
gt.add_i(gt_b);
acti_func.run_fn(gt, gt);
zt.multiply(hs_prev, hs);
- Tensor a = zt.multiply(-1.0).add(1.0);
- hs.add_i(gt.multiply(a));
+ temp = zt.multiply(-1.0).add(1.0);
+ hs.add_i(gt.multiply(temp));
}
h_prev.getBatchSlice(b, 1).copy(hs);
}
weightAt(static_cast<int>(GRUParams::weight_hh)).getVariableRef();
djdw_x.setZero();
- djdw_h.setZero();
+ Tensor djdw_zr_h = Tensor({1, 1, unit, unit * 2}, true);
+ djdw_zr_h.setZero();
+ Tensor djdw_g_h = Tensor({1, 1, unit, unit}, true);
+ djdw_g_h.setZero();
djdb_h.setZero();
hidden->getGradientRef().setZero();
Tensor dh;
Tensor hs_prev;
- Tensor hs;
Tensor xs;
Tensor dzrg_ = zrg->getGradientRef().getBatchSlice(b, 1);
Tensor zrg_ = zrg->getVariableRef().getBatchSlice(b, 1);
for (unsigned int t = deriv_t.height(); t-- > 0;) {
dh = deriv_t.getSharedDataTensor({deriv_t.width()}, t * deriv_t.width());
xs = xs_t.getSharedDataTensor({xs_t.width()}, t * xs_t.width());
- hs = hs_t.getSharedDataTensor({hs_t.width()}, t * hs_t.width());
Tensor dzrg_t =
dzrg_.getSharedDataTensor({unit * NUM_GATE}, unit * t * NUM_GATE);
Tensor rt = zrg_t.getSharedDataTensor({unit}, unit);
Tensor gt = zrg_t.getSharedDataTensor({unit}, unit * 2);
- dh.multiply(hs_prev, dhz);
- dhz.subtract_i(gt.multiply(dh));
+ zt.multiply(dh, dh_nx); // dh_nx = d1
+
+ dh.multiply(hs_prev, dhz); // dhz = d2
+ dhz.subtract_i(gt.multiply(dh)); // dhz = d5
zt.multiply(-1.0, dhg);
dhg.add_i(1.0);
- dhg.multiply_i(dh);
- recurrent_acti_func.run_prime_fn(zt, dhz, dhz);
- acti_func.run_prime_fn(gt, dhg, dhg);
+ dhg.multiply_i(dh); // dhg = d6
- Tensor dhzr = dzrg_t.getSharedDataTensor({unit * 2}, 0);
- Tensor djdw_zr_h = djdw_h.getSharedDataTensor({unit * unit * 2}, 0);
- Tensor djdw_g_h =
- djdw_h.getSharedDataTensor({unit * unit}, unit * unit * 2);
+ recurrent_acti_func.run_prime_fn(zt, dhz, dhz); // dhz = d7
+ acti_func.run_prime_fn(gt, dhg, dhg); // dhg = d8
- Tensor wg_hh =
- weight_hh.getSharedDataTensor({unit * unit}, unit * unit * 2);
- Tensor wzr_hh = weight_hh.getSharedDataTensor({unit * unit * 2}, 0);
+ Tensor dhzr = dzrg_t.getSharedDataTensor({unit * 2}, 0); // dhz+dhr
- dhg.multiply(wg_hh, dh_nx);
- hs_prev.multiply(dh_nx, dhr);
- dh_nx.multiply_i(rt);
- recurrent_acti_func.run_prime_fn(rt, dhr, dhr);
+ Tensor wg_hh;
+ wg_hh.copy_with_stride(
+ weight_hh.getSharedDataTensor({1, 1, unit, unit}, unit * 2, false));
+ Tensor wzr_hh;
+ wzr_hh.copy_with_stride(
+ weight_hh.getSharedDataTensor({1, 1, unit, unit * 2}, 0, false));
- djdb_h.add_i(dzrg_t);
+ Tensor temp = Tensor({hs_t.width()});
+ temp.setZero();
+ dhg.dot(wg_hh, temp, false, true); // temp = d10
+ hs_prev.multiply(temp, dhr); // dhr = d15
+ temp.multiply_i(rt); // temp=d14
+ dh_nx.add_i(temp); // dh_nx = d1 + d14
+ // reset temp : hs_prev * rt for djdw_g_h
+ hs_prev.multiply(rt, temp);
+ recurrent_acti_func.run_prime_fn(rt, dhr, dhr); // dhr = d16
+
+ djdb_h.add_i(dzrg_t); // dzrg_t = d7+d16+d8
djdw_x.add_i(xs.dot(dzrg_t, true, false));
- djdw_zr_h.add_i(hs_prev.dot(dhzr, true, false));
- djdw_g_h.add_i(hs_prev.multiply(rt).dot(dhg, true, false));
- dhzr.dot(wzr_hh, dh_nx, false, true);
- dh_nx.add_i(zt.multiply(dh));
+ djdw_zr_h.add_i(hs_prev.dot(dhzr, true, false));
+ djdw_g_h.add_i(temp.dot(dhg, true, false));
+ dhzr.dot(wzr_hh, dh_nx, false, true, 1.0); // dh_nx = d1 + d14 + d12 + d17
}
}
+ for (unsigned int h = 0; h < unit; ++h) {
+ float *data = djdw_zr_h.getAddress(h * unit * 2);
+ float *rdata = djdw_h.getAddress(h * unit * NUM_GATE);
+ std::copy(data, data + unit * 2, rdata);
+ }
+
+ for (unsigned int h = 0; h < unit; ++h) {
+ float *data = djdw_g_h.getAddress(h * unit);
+ float *rdata = djdw_h.getAddress(h * unit * NUM_GATE + unit * 2);
+ std::copy(data, data + unit, rdata);
+ }
}
} // namespace nntrainer
* @brief Constructor of GRULayer
*/
template <typename... Args>
- GRULayer(unsigned int unit_ = 0,
- ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
- bool sequence = false, Args... args) :
+ GRULayer(
+ unsigned int unit_ = 0,
+ ActivationType hidden_state_activation_type_ = ActivationType::ACT_NONE,
+ ActivationType recurrent_activation_type_ = ActivationType::ACT_NONE,
+ bool sequence = false, Args... args) :
LayerV1(args...),
unit(unit_),
+ hidden_state_activation_type(hidden_state_activation_type_),
recurrent_activation_type(recurrent_activation_type_),
return_sequences(sequence){};
Tensor xs;
Tensor hs_prev;
Tensor cs_prev;
- Tensor hs;
Tensor cs;
Tensor dc;
Tensor dfgio_ = fgio->getGradientRef().getBatchSlice(b, 1);
dc =
derivc_t.getSharedDataTensor({derivc_t.width()}, t * derivc_t.width());
xs = xs_t.getSharedDataTensor({xs_t.width()}, t * xs_t.width());
- hs = hs_t.getSharedDataTensor({hs_t.width()}, t * hs_t.width());
cs = cs_t.getSharedDataTensor({cs_t.width()}, t * cs_t.width());
Tensor dfgio_t =
src.src_tensor->tensor(), offset + src.src_tensor->offset());
}
-Tensor Tensor::getSharedDataTensor(const TensorDim dim_,
- unsigned int offset) const {
+Tensor Tensor::getSharedDataTensor(const TensorDim dim_, unsigned int offset,
+ bool reset_stride) const {
Tensor ret = *this;
if (dim_.getDataLen() + offset > dim.getDataLen())
"Creating shared tensor of size bigger than tensor memory.");
ret.dim = dim_;
- ret.strides = ret.dim.computeStrides();
+ if (reset_stride)
+ ret.strides = ret.dim.computeStrides();
/**
* In this case, its the caller's responsibility to ensure that allocate() is
Tensor &Tensor::dot(Tensor const &m, Tensor &result, bool trans, bool trans_m,
float beta) const {
if (m.dim.rank() > 2) {
+
throw exception::not_supported("Error: support only for rank of dot "
"matrix <= 2");
}
scopy(length(), buf, 1, getData(), 1);
}
+void Tensor::copy_with_stride(const Tensor &from) {
+ if (from.length() != 0 && length() == from.length()) {
+ reshape(from.getDim());
+ for (unsigned int b = 0; b < from.batch(); ++b) {
+ unsigned int from_b = b * from.strides[0];
+ unsigned int t_b = b * from.channel() * from.height() * from.width();
+ for (unsigned int c = 0; c < from.channel(); ++c) {
+ unsigned int from_c = c * from.strides[1];
+ unsigned int t_c = c * from.height() * from.width();
+ for (unsigned int h = 0; h < from.height(); ++h) {
+ unsigned int from_h = h * from.strides[2];
+ unsigned int t_h = h * from.width();
+ for (unsigned int w = 0; w < from.width(); ++w) {
+ unsigned int from_w = w * from.strides[3];
+ getData()[t_b + t_c + t_h + w] =
+ from.getData()[from_b + from_c + from_h + from_w];
+ }
+ }
+ }
+ }
+ } else {
+ Tensor t = Tensor(from.getDim(), true);
+ for (unsigned int b = 0; b < from.batch(); ++b) {
+ unsigned int from_b = b * from.strides[0];
+ for (unsigned int c = 0; c < from.channel(); ++c) {
+ unsigned int from_c = c * from.strides[1];
+ for (unsigned int h = 0; h < from.height(); ++h) {
+ unsigned int from_h = h * from.strides[2];
+ for (unsigned int w = 0; w < from.width(); ++w) {
+ unsigned int from_w = w * from.strides[3];
+ t.setValue(b, c, h, w,
+ from.getData()[from_b + from_c + from_h + from_w]);
+ }
+ }
+ }
+ }
+ swap(t, *this);
+ }
+}
+
void Tensor::copy(const Tensor &from) {
// todo: enable copy to non-contiguous tensor
if (!is_contiguous) {
void copy(const Tensor &from);
/**
+ * @brief Copy the Tensor
+ * @param[in] from Tensor to be copied
+ */
+ void copy_with_stride(const Tensor &from);
+
+ /**
* @brief Get slice of the tensor, sliced by batch
* @param[in] offset offset in batch to start the slice
* @param[in] size size of the slice
* @note New size added with offset must be less than the size of the original
* tensor.
*/
- Tensor getSharedDataTensor(const TensorDim dim, unsigned int offset) const;
+ Tensor getSharedDataTensor(const TensorDim dim, unsigned int offset,
+ bool reset_stride = true) const;
/**
* @brief make this tensor share memory with given tensor
)
multi_rnn_layer_tc(1,2)(file_name="multi_rnn_return_sequence.info")
multi_rnn_layer_tc(2,2)(file_name="multi_rnn_return_sequence_with_batch.info")
+
+ gru_layer_tc = lambda batch, time, return_sequences: partial(
+ record,
+ model=[
+ K.Input(batch_shape=(batch, time, 1)),
+ K.layers.GRU(
+ time,
+ recurrent_activation="sigmoid",
+ activation="tanh",
+ return_sequences=return_sequences,
+ ),
+ K.layers.Dense(1),
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=10,
+ input_shape=(batch, time, 1),
+ label_shape=(batch, time, 1),
+ is_onehot=False,
+ loss_fn_str="mse"
+ )
+
+ gru_layer_tc(1, 1, False)(file_name="gru_basic.info")
+ gru_layer_tc(1, 2, True)(file_name="gru_return_sequence.info")
+ gru_layer_tc(2, 2, True)(file_name="gru_return_sequence_with_batch.info")
+
+ multi_gru_layer_tc = lambda batch, time: partial(
+ record,
+ model=[
+ K.Input(batch_shape=(batch, time, 1)),
+ K.layers.GRU(
+ time,
+ recurrent_activation="sigmoid",
+ activation="tanh",
+ return_sequences=True,
+ ),
+ K.layers.GRU(time, recurrent_activation="sigmoid", activation="tanh"),
+ K.layers.Dense(1),
+ ],
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=10,
+ input_shape=(batch, time, 1),
+ label_shape=(batch, 1),
+ is_onehot=False,
+ loss_fn_str="mse",
+ )
+ multi_gru_layer_tc(1,2)(file_name="multi_gru_return_sequence.info")
+ multi_gru_layer_tc(2,2)(file_name="multi_gru_return_sequence_with_batch.info")
}
);
+INI gru_basic(
+ "gru_basic",
+ {
+ nn_base + "loss=mse | batch_size=1",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:1:1",
+ I("gru") + gru_base +
+ "unit = 1" + "input_layers=input",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
+ }
+);
+
+INI gru_return_sequence(
+ "gru_return_sequence",
+ {
+ nn_base + "loss=mse | batch_size=1",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("gru") + gru_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
+ }
+);
+
+INI gru_return_sequence_with_batch(
+ "gru_return_sequence_with_batch",
+ {
+ nn_base + "loss=mse | batch_size=2",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("gru") + gru_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru"
+ }
+);
+
+INI multi_gru_return_sequence(
+ "multi_gru_return_sequence",
+ {
+ nn_base + "loss=mse | batch_size=1",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("gru") + gru_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("gru2") + gru_base +
+ "unit = 2" + "input_layers=gru",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru2"
+ }
+);
+
+
+INI multi_gru_return_sequence_with_batch(
+ "multi_gru_return_sequence_with_batch",
+ {
+ nn_base + "loss=mse | batch_size=2",
+ sgd_base + "learning_rate = 0.1",
+ I("input") + input_base + "input_shape=1:2:1",
+ I("gru") + gru_base +
+ "unit = 2" + "input_layers=input"+ "return_sequences=true",
+ I("gru2") + gru_base +
+ "unit = 2" + "input_layers=gru",
+ I("outputlayer") + fc_base + "unit = 1" + "input_layers=gru2"
+ }
+);
+
INSTANTIATE_TEST_CASE_P(
nntrainerModelAutoTests, nntrainerModelTest, ::testing::Values(
mkModelTc(fc_sigmoid_mse, "3:1:1:10", 10),
mkModelTc(rnn_return_sequences, "1:1:2:1", 10),
mkModelTc(rnn_return_sequence_with_batch, "2:1:2:1", 10),
mkModelTc(multi_rnn_return_sequence, "1:1:1:1", 10),
- mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10)
+ mkModelTc(multi_rnn_return_sequence_with_batch, "2:1:1:1", 10),
+ mkModelTc(gru_basic, "1:1:1:1", 10),
+ mkModelTc(gru_return_sequence, "1:1:2:1", 10),
+ mkModelTc(gru_return_sequence_with_batch, "2:1:2:1", 10),
+ mkModelTc(multi_gru_return_sequence, "1:1:1:1", 10),
+ mkModelTc(multi_gru_return_sequence_with_batch, "2:1:1:1", 10)
), [](const testing::TestParamInfo<nntrainerModelTest::ParamType>& info){
return std::get<0>(info.param).getName();
});