[layer] Bug fix for LSTMCell

author Parichay Kapoor <pk.kapoor@samsung.com>

Mon, 25 Oct 2021 01:29:50 +0000 (10:29 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 26 Oct 2021 00:11:31 +0000 (09:11 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Mon, 25 Oct 2021 01:29:50 +0000 (10:29 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 26 Oct 2021 00:11:31 +0000 (09:11 +0900)
diff --git a/jni/Android.mk b/jni/Android.mk

index aa111f2..df470e5 100644 (file)
--- a/jni/Android.mk
+++ b/jni/Android.mk
@@ -170,6 +170,7 @@ NNTRAINER_SRCS := $(NNTRAINER_ROOT)/nntrainer/models/neuralnet.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/embedding.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/rnn.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/lstm.cpp \
+                  $(NNTRAINER_ROOT)/nntrainer/layers/lstmcell.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/gru.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/time_dist.cpp \
                    $(NNTRAINER_ROOT)/nntrainer/layers/dropout.cpp \
diff --git a/nntrainer/compiler/recurrent_realizer.cpp b/nntrainer/compiler/recurrent_realizer.cpp

index 0751cc6..3e17130 100644 (file)
--- a/nntrainer/compiler/recurrent_realizer.cpp
+++ b/nntrainer/compiler/recurrent_realizer.cpp
@@ -16,10 +16,12 @@
  #include <input_layer.h>
  #include <layer_node.h>
  #include <lstm.h>
+#include <lstmcell.h>
  #include <nntrainer_error.h>
  #include <node_exporter.h>
  #include <remap_realizer.h>
  #include <util_func.h>
+
  namespace nntrainer {
  
  namespace props {
@@ -123,8 +125,10 @@ RecurrentRealizer::RecurrentRealizer(
  static void propagateTimestep(LayerNode *node, unsigned int time_step,
                                unsigned int max_time_step) {
  
+  /** @todo add an interface to check if a layer supports a property */
    auto is_recurrent_type = [](LayerNode *node) {
-    return node->getType() == LSTMLayer::type;
+    return node->getType() == LSTMLayer::type ||
+           node->getType() == LSTMCellLayer::type;
    };
  
    if (is_recurrent_type(node)) {
diff --git a/nntrainer/layers/lstmcell.cpp b/nntrainer/layers/lstmcell.cpp

index 8e64221..e17e196 100644 (file)
--- a/nntrainer/layers/lstmcell.cpp
+++ b/nntrainer/layers/lstmcell.cpp
@@ -82,7 +82,7 @@ void LSTMCellLayer::finalize(InitLayerContext &context) {
    // input_dim = [ batch, 1, 1, feature_size ]
    TensorDim output_dim;
    const TensorDim &input_dim = context.getInputDimensions()[0];
-  if (input_dim.height() != 1)
+  if (input_dim.height() != 1 && input_dim.channel() != 1)
      throw std::invalid_argument(
        "Input must be single time dimension for LSTMCell");
    // output_dim = [ batch, 1, 1, hidden_size (unit)]
@@ -281,25 +281,24 @@ void LSTMCellLayer::calcGradient(RunLayerContext &context) {
      d_fgio.setZero();
    }
  
-  std::copy(incoming_deriv.getData(),
-            incoming_deriv.getData() + incoming_deriv.size(),
-            derivative_.getData());
+  Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
+  dh.reshape(incoming_deriv.getDim());
+  if (start_timestep + 1 == max_timestep) {
+    dh.copyData(incoming_deriv);
+  } else {
+    dh.add_i(incoming_deriv);
+  }
+  dh = derivative_.getBatchSlice(start_timestep, 1);
  
    if (dropout_rate > epsilon) {
      derivative_.multiply_i(context.getTensor(wt_idx[LSTMParams::dropout_mask]));
    }
  
-  Tensor dh = derivative_.getBatchSlice(start_timestep, 1);
-
-  std::copy(incoming_deriv.getData(),
-            incoming_deriv.getData() + incoming_deriv.size(), dh.getData());
-
    Tensor dc = dm_cell_.getBatchSlice(start_timestep, 1);
    Tensor xs = input_;
    Tensor hs_t = hidden_.getBatchSlice(start_timestep, 1);
    Tensor cs = m_cell_.getBatchSlice(start_timestep, 1);
  
-  Tensor hs_prev;
    Tensor dfgio_t = d_fgio.getBatchSlice(start_timestep, 1);
    Tensor fgio_t = fgio.getBatchSlice(start_timestep, 1);
  
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp

index c1598dc..a90b866 100644 (file)
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -341,6 +341,7 @@ Tensor Tensor::multiply_strided(Tensor const &m) const {
  }
  
  Tensor &Tensor::multiply_strided(Tensor const &m, Tensor &output) const {
+  /** TODO: throw than create new dimenions */
    CREATE_IF_EMPTY_DIMS(output, dim);
  
    if (size() != m.size() || size() != output.size())
@@ -413,8 +414,9 @@ Tensor &Tensor::multiply(Tensor const &m, Tensor &output) const {
      }
    };
  
-  if ((size() == m.size() && strides != m.strides) ||
-      (size() == output.size() && strides != output.strides))
+  if (output.size() > 0 &&
+      ((size() == m.size() && strides != m.strides) ||
+       (size() == output.size() && strides != output.strides)))
      throw std::invalid_argument(
        "Use multiply_strided for multiplying strided tensors");
  
@@ -621,7 +623,6 @@ Tensor Tensor::getSharedDataTensor(const TensorDim dim_, unsigned int offset,
    TensorDim new_match_dim = dim_;
    new_match_dim.batch(dim.batch());
    if (new_match_dim != dim && !reset_stride)
-    // throw std::runtime_error("non contiguous tensor");
      ret.contiguous = false;
  
    /**
diff --git a/test/unittest/models/unittest_models_recurrent.cpp b/test/unittest/models/unittest_models_recurrent.cpp

index e8df1c7..780a4e2 100644 (file)
--- a/test/unittest/models/unittest_models_recurrent.cpp
+++ b/test/unittest/models/unittest_models_recurrent.cpp
@@ -136,6 +136,67 @@ static std::unique_ptr<NeuralNetwork> makeStackedLSTM() {
    return nn;
  }
  
+static std::unique_ptr<NeuralNetwork> makeSingleLSTMCell() {
+  std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+  nn->setProperty({"batch_size=3"});
+
+  auto outer_graph = makeGraph({
+    {"input", {"name=input", "input_shape=1:1:2"}},
+    /// here lstm_cells is being inserted
+    {"mse", {"name=loss", "input_layers=lstm_scope/a1"}},
+  });
+  for (auto &node : outer_graph) {
+    nn->addLayer(node);
+  }
+
+  auto lstm = makeGraph({
+    {"lstmcell", {"name=a1", "unit=2"}},
+  });
+
+  nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a1"},
+                             ml::train::ReferenceLayersType::RECURRENT,
+                             {
+                               "unroll_for=2",
+                               "return_sequences=true",
+                               "recurrent_input=a1",
+                               "recurrent_output=a1",
+                             });
+
+  nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+  return nn;
+}
+
+static std::unique_ptr<NeuralNetwork> makeStackedLSTMCell() {
+  std::unique_ptr<NeuralNetwork> nn(new NeuralNetwork());
+  nn->setProperty({"batch_size=3"});
+
+  auto outer_graph = makeGraph({
+    {"input", {"name=input", "input_shape=1:1:2"}},
+    /// here lstm_cells is being inserted
+    {"mse", {"name=loss", "input_layers=lstm_scope/a2"}},
+  });
+  for (auto &node : outer_graph) {
+    nn->addLayer(node);
+  }
+
+  auto lstm = makeGraph({
+    {"lstmcell", {"name=a1", "unit=2"}},
+    {"lstmcell", {"name=a2", "unit=2", "input_layers=a1"}},
+  });
+
+  nn->addWithReferenceLayers(lstm, "lstm_scope", {"input"}, {"a1"}, {"a2"},
+                             ml::train::ReferenceLayersType::RECURRENT,
+                             {
+                               "unroll_for=2",
+                               "return_sequences=true",
+                               "recurrent_input=a1",
+                               "recurrent_output=a2",
+                             });
+
+  nn->setOptimizer(ml::train::createOptimizer("sgd", {"learning_rate = 0.1"}));
+  return nn;
+}
+
  INSTANTIATE_TEST_CASE_P(
    recurrentModels, nntrainerModelTest,
    ::testing::ValuesIn({
@@ -143,7 +204,11 @@ INSTANTIATE_TEST_CASE_P(
                   ModelTestOption::COMPARE_V2),
      mkModelTc_V2(makeFC, "fc_unroll_stacked", ModelTestOption::COMPARE_V2),
      mkModelTc_V2(makeSingleLSTM, "lstm_single", ModelTestOption::COMPARE_V2),
+    mkModelTc_V2(makeSingleLSTMCell, "lstm_single__1",
+                 ModelTestOption::COMPARE_V2),
      mkModelTc_V2(makeStackedLSTM, "lstm_stacked", ModelTestOption::COMPARE_V2),
+    mkModelTc_V2(makeStackedLSTMCell, "lstm_stacked__1",
+                 ModelTestOption::COMPARE_V2),
    }),
    [](const testing::TestParamInfo<nntrainerModelTest::ParamType> &info) {
      return std::get<1>(info.param);
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Mon, 25 Oct 2021 01:29:50 +0000 (10:29 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 26 Oct 2021 00:11:31 +0000 (09:11 +0900)
jni/Android.mk		patch \| blob \| history
nntrainer/compiler/recurrent_realizer.cpp		patch \| blob \| history
nntrainer/layers/lstmcell.cpp		patch \| blob \| history
nntrainer/tensor/tensor.cpp		patch \| blob \| history
test/unittest/models/unittest_models_recurrent.cpp		patch \| blob \| history