[layer] Bug fix for LSTMCell
authorParichay Kapoor <pk.kapoor@samsung.com>
Mon, 25 Oct 2021 01:29:50 +0000 (10:29 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Tue, 26 Oct 2021 00:11:31 +0000 (09:11 +0900)
Enable recurrent unrolling tests for LSTMCell with a single and stacked
LSTM cell. Add corresponding bug fixes.

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
jni/Android.mk
nntrainer/compiler/recurrent_realizer.cpp
nntrainer/layers/lstmcell.cpp
nntrainer/tensor/tensor.cpp
test/unittest/models/unittest_models_recurrent.cpp

index aa111f2..df470e5 100644 (file)
@@ -170,6 +170,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/embedding.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/rnn.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/lstm.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/layers/lstmcell.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/gru.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/time_dist.cpp \
                   $(NNTRAINER_ROOT)/nntrainer/layers/dropout.cpp \
index 0751cc6..3e17130 100644 (file)
 #include <input_layer.h>
 #include <layer_node.h>
 #include <lstm.h>
+#include <lstmcell.h>
 #include <nntrainer_error.h>
 #include <node_exporter.h>
 #include <remap_realizer.h>
 #include <util_func.h>
+
 namespace nntrainer {
 
 namespace props {
@@ -123,8 +125,10 @@ RecurrentRealizer::RecurrentRealizer(
 static void propagateTimestep(LayerNode *node, unsigned int time_step,
                               unsigned int max_time_step) {
 
+  /** @todo add an interface to check if a layer supports a property */
   auto is_recurrent_type = [](LayerNode *node) {
-    return node->getType() == LSTMLayer::type;
+    return node->getType() == LSTMLayer::type ||
+           node->getType() == LSTMCellLayer::type;
   };
 
   if (is_recurrent_type(node)) {
index 8e64221..e17e196 100644 (file)
@@ -82,7 +82,7 @@ void LSTMCellLayer::finalize(InitLayerContext &context) {
   // input_dim = [ batch, 1, 1, feature_size ]
   TensorDim output_dim;
   const TensorDim &input_dim = context.getInputDimensions()[0];
-  if (input_dim.height() != 1)
+  if (input_dim.height() != 1 && input_dim.channel() != 1)
     throw std::invalid_argument(
       "Input must be single time dimension for LSTMCell");
   // output_dim = [ batch, 1, 1, hidden_size (unit)]
@@ -281,25 +281,24 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
     d_fgio.setZero();
   }
 
-  std::copy(incoming_deriv.getData(),
-            incoming_deriv.getData() + incoming_deriv.size(),
-            derivative_.getData());
+  Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
+  dh.reshape(incoming_deriv.getDim());
+  if (start_timestep + 1 == max_timestep) {
+    dh.copyData(incoming_deriv);
+  } else {
+    dh.add_i(incoming_deriv);
+  }
+  dh = derivative_.getBatchSlice(start_timestep, 1);
 
   if (dropout_rate > epsilon) {
     derivative_.multiply_i(context.getTensor(wt_idx[LSTMParams::dropout_mask]));
   }
 
-  Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
-
-  std::copy(incoming_deriv.getData(),
-            incoming_deriv.getData() + incoming_deriv.size(), dh.getData());
-
   Tensor dc = dm_cell_.getBatchSlice(start_timestep, 1);
   Tensor xs = input_;
   Tensor hs_t = hidden_.getBatchSlice(start_timestep, 1);
   Tensor cs = m_cell_.getBatchSlice(start_timestep, 1);
 
-  Tensor hs_prev;
   Tensor dfgio_t = d_fgio.getBatchSlice(start_timestep, 1);
   Tensor fgio_t = fgio.getBatchSlice(start_timestep, 1);
 
index c1598dc..a90b866 100644 (file)
@@ -341,6 +341,7 @@ Tensor Tensor::multiply_strided(Tensor const &m) const {
 }
 
 Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output) const {
+  /** TODO: throw than create new dimenions */
   CREATE_IF_EMPTY_DIMS(output, dim);
 
   if (size() != m.size() || size() != output.size())
@@ -413,8 +414,9 @@ Tensor &Tensor::multiply(Tensor const &m, Tensor &output) const {
     }
   };
 
-  if ((size() == m.size() && strides != m.strides) ||
-      (size() == output.size() && strides != output.strides))
+  if (output.size() > 0 &&
+      ((size() == m.size() && strides != m.strides) ||
+       (size() == output.size() && strides != output.strides)))
     throw std::invalid_argument(
       "Use multiply_strided for multiplying strided tensors");
 
@@ -621,7 +623,6 @@ Tensor Tensor::getSharedDataTensor(const TensorDim dim_, unsigned int offset,
   TensorDim new_match_dim = dim_;
   new_match_dim.batch(dim.batch());
   if (new_match_dim != dim && !reset_stride)
-    // throw std::runtime_error("non contiguous tensor");
     ret.contiguous = false;
 
   /**
index e8df1c7..780a4e2 100644 (file)
@@ -136,6 +136,67 @@ static std::unique_ptr<NeuralNetwork> makeStackedLSTM() {
   return nn;
 }
 
+static std::unique_ptr<NeuralNetwork> makeSingleLSTMCell() {
+  std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+  nn->setProperty({"batch_size=3"});
+
+  auto outer_graph = makeGraph({
+    {"input", {"name=input", "input_shape=1:1:2"}},
+    /// here lstm_cells is being inserted
+    {"mse", {"name=loss", "input_layers=lstm_scope/a1"}},
+  });
+  for (auto &node : outer_graph) {
+    nn->addLayer(node);
+  }
+
+  auto lstm = makeGraph({
+    {"lstmcell", {"name=a1", "unit=2"}},
+  });
+
+  nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a1"},
+                             ml::train::ReferenceLayersType::RECURRENT,
+                             {
+                               "unroll_for=2",
+                               "return_sequences=true",
+                               "recurrent_input=a1",
+                               "recurrent_output=a1",
+                             });
+
+  nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+  return nn;
+}
+
+static std::unique_ptr<NeuralNetwork> makeStackedLSTMCell() {
+  std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+  nn->setProperty({"batch_size=3"});
+
+  auto outer_graph = makeGraph({
+    {"input", {"name=input", "input_shape=1:1:2"}},
+    /// here lstm_cells is being inserted
+    {"mse", {"name=loss", "input_layers=lstm_scope/a2"}},
+  });
+  for (auto &node : outer_graph) {
+    nn->addLayer(node);
+  }
+
+  auto lstm = makeGraph({
+    {"lstmcell", {"name=a1", "unit=2"}},
+    {"lstmcell", {"name=a2", "unit=2", "input_layers=a1"}},
+  });
+
+  nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a2"},
+                             ml::train::ReferenceLayersType::RECURRENT,
+                             {
+                               "unroll_for=2",
+                               "return_sequences=true",
+                               "recurrent_input=a1",
+                               "recurrent_output=a2",
+                             });
+
+  nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+  return nn;
+}
+
 INSTANTIATE_TEST_CASE_P(
   recurrentModels, nntrainerModelTest,
   ::testing::ValuesIn({
@@ -143,7 +204,11 @@ INSTANTIATE_TEST_CASE_P(
                  ModelTestOption::COMPARE_V2),
     mkModelTc_V2(makeFC, "fc_unroll_stacked", ModelTestOption::COMPARE_V2),
     mkModelTc_V2(makeSingleLSTM, "lstm_single", ModelTestOption::COMPARE_V2),
+    mkModelTc_V2(makeSingleLSTMCell, "lstm_single__1",
+                 ModelTestOption::COMPARE_V2),
     mkModelTc_V2(makeStackedLSTM, "lstm_stacked", ModelTestOption::COMPARE_V2),
+    mkModelTc_V2(makeStackedLSTMCell, "lstm_stacked__1",
+                 ModelTestOption::COMPARE_V2),
   }),
   [](const testing::TestParamInfo<nntrainerModelTest::ParamType> &info) {
     return std::get<1>(info.param);