$(NNTRAINER_ROOT)/nntrainer/layers/embedding.cpp \
$(NNTRAINER_ROOT)/nntrainer/layers/rnn.cpp \
$(NNTRAINER_ROOT)/nntrainer/layers/lstm.cpp \
+ $(NNTRAINER_ROOT)/nntrainer/layers/lstmcell.cpp \
$(NNTRAINER_ROOT)/nntrainer/layers/gru.cpp \
$(NNTRAINER_ROOT)/nntrainer/layers/time_dist.cpp \
$(NNTRAINER_ROOT)/nntrainer/layers/dropout.cpp \
#include <input_layer.h>
#include <layer_node.h>
#include <lstm.h>
+#include <lstmcell.h>
#include <nntrainer_error.h>
#include <node_exporter.h>
#include <remap_realizer.h>
#include <util_func.h>
+
namespace nntrainer {
namespace props {
static void propagateTimestep(LayerNode *node, unsigned int time_step,
unsigned int max_time_step) {
+ /** @todo add an interface to check if a layer supports a property */
auto is_recurrent_type = [](LayerNode *node) {
- return node->getType() == LSTMLayer::type;
+ return node->getType() == LSTMLayer::type ||
+ node->getType() == LSTMCellLayer::type;
};
if (is_recurrent_type(node)) {
// input_dim = [ batch, 1, 1, feature_size ]
TensorDim output_dim;
const TensorDim &input_dim = context.getInputDimensions()[0];
- if (input_dim.height() != 1)
+ if (input_dim.height() != 1 && input_dim.channel() != 1)
throw std::invalid_argument(
"Input must be single time dimension for LSTMCell");
// output_dim = [ batch, 1, 1, hidden_size (unit)]
d_fgio.setZero();
}
- std::copy(incoming_deriv.getData(),
- incoming_deriv.getData() + incoming_deriv.size(),
- derivative_.getData());
+ Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
+ dh.reshape(incoming_deriv.getDim());
+ if (start_timestep + 1 == max_timestep) {
+ dh.copyData(incoming_deriv);
+ } else {
+ dh.add_i(incoming_deriv);
+ }
+ dh = derivative_.getBatchSlice(start_timestep, 1);
if (dropout_rate > epsilon) {
derivative_.multiply_i(context.getTensor(wt_idx[LSTMParams::dropout_mask]));
}
- Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
-
- std::copy(incoming_deriv.getData(),
- incoming_deriv.getData() + incoming_deriv.size(), dh.getData());
-
Tensor dc = dm_cell_.getBatchSlice(start_timestep, 1);
Tensor xs = input_;
Tensor hs_t = hidden_.getBatchSlice(start_timestep, 1);
Tensor cs = m_cell_.getBatchSlice(start_timestep, 1);
- Tensor hs_prev;
Tensor dfgio_t = d_fgio.getBatchSlice(start_timestep, 1);
Tensor fgio_t = fgio.getBatchSlice(start_timestep, 1);
}
Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output) const {
+ /** TODO: throw than create new dimenions */
CREATE_IF_EMPTY_DIMS(output, dim);
if (size() != m.size() || size() != output.size())
}
};
- if ((size() == m.size() && strides != m.strides) ||
- (size() == output.size() && strides != output.strides))
+ if (output.size() > 0 &&
+ ((size() == m.size() && strides != m.strides) ||
+ (size() == output.size() && strides != output.strides)))
throw std::invalid_argument(
"Use multiply_strided for multiplying strided tensors");
TensorDim new_match_dim = dim_;
new_match_dim.batch(dim.batch());
if (new_match_dim != dim && !reset_stride)
- // throw std::runtime_error("non contiguous tensor");
ret.contiguous = false;
/**
return nn;
}
+static std::unique_ptr<NeuralNetwork> makeSingleLSTMCell() {
+ std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+ nn->setProperty({"batch_size=3"});
+
+ auto outer_graph = makeGraph({
+ {"input", {"name=input", "input_shape=1:1:2"}},
+ /// here lstm_cells is being inserted
+ {"mse", {"name=loss", "input_layers=lstm_scope/a1"}},
+ });
+ for (auto &node : outer_graph) {
+ nn->addLayer(node);
+ }
+
+ auto lstm = makeGraph({
+ {"lstmcell", {"name=a1", "unit=2"}},
+ });
+
+ nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a1"},
+ ml::train::ReferenceLayersType::RECURRENT,
+ {
+ "unroll_for=2",
+ "return_sequences=true",
+ "recurrent_input=a1",
+ "recurrent_output=a1",
+ });
+
+ nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+ return nn;
+}
+
+static std::unique_ptr<NeuralNetwork> makeStackedLSTMCell() {
+ std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+ nn->setProperty({"batch_size=3"});
+
+ auto outer_graph = makeGraph({
+ {"input", {"name=input", "input_shape=1:1:2"}},
+ /// here lstm_cells is being inserted
+ {"mse", {"name=loss", "input_layers=lstm_scope/a2"}},
+ });
+ for (auto &node : outer_graph) {
+ nn->addLayer(node);
+ }
+
+ auto lstm = makeGraph({
+ {"lstmcell", {"name=a1", "unit=2"}},
+ {"lstmcell", {"name=a2", "unit=2", "input_layers=a1"}},
+ });
+
+ nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a2"},
+ ml::train::ReferenceLayersType::RECURRENT,
+ {
+ "unroll_for=2",
+ "return_sequences=true",
+ "recurrent_input=a1",
+ "recurrent_output=a2",
+ });
+
+ nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+ return nn;
+}
+
INSTANTIATE_TEST_CASE_P(
recurrentModels, nntrainerModelTest,
::testing::ValuesIn({
ModelTestOption::COMPARE_V2),
mkModelTc_V2(makeFC, "fc_unroll_stacked", ModelTestOption::COMPARE_V2),
mkModelTc_V2(makeSingleLSTM, "lstm_single", ModelTestOption::COMPARE_V2),
+ mkModelTc_V2(makeSingleLSTMCell, "lstm_single__1",
+ ModelTestOption::COMPARE_V2),
mkModelTc_V2(makeStackedLSTM, "lstm_stacked", ModelTestOption::COMPARE_V2),
+ mkModelTc_V2(makeStackedLSTMCell, "lstm_stacked__1",
+ ModelTestOption::COMPARE_V2),
}),
[](const testing::TestParamInfo<nntrainerModelTest::ParamType> &info) {
return std::get<1>(info.param);