From b9e3ac6ac3be492a4efd16feee4325316e85f5e0 Mon Sep 17 00:00:00 2001
From: Jihoon Lee <jhoon.it.lee@samsung.com>
Date: Thu, 10 Jun 2021 13:21:03 +0900
Subject: [PATCH] [Loss] Apply requireLabel to feed label

Currently, loss layer is the only one that can accept label which was
prohibiting building a custom loss layer.

**Major Changes**
- Apply requireLabel to feed label
- Last layer is not assumed loss layer when feeding label(still it is in
other places, need to be dealt)

**Minor Changes**
- Feeding multiple label is now possible (spliting label is future work)
- [Fix] Count Trainable includes loss layer
- [Fix] Handle cases When all layer is not trainable for
neuralnet::backwarding

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
---
 nntrainer/graph/network_graph.cpp | 24 +++++++++++---
 nntrainer/layers/loss_layer.cpp   |  4 ---
 nntrainer/layers/time_dist.h      |  5 +++
 nntrainer/models/neuralnet.cpp    | 67 +++++++++++++++++++--------------------
 4 files changed, 56 insertions(+), 44 deletions(-)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp
index 8f48f07..578cbe1 100644
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -50,11 +50,11 @@ int NetworkGraph::compile(const LossType loss_type) {
 
   graph.topologicalSort();
 
-  countNonTrainableLayersAtBegin();
-
   status = addLossLayer(loss_type);
   NN_RETURN_STATUS();
 
+  countNonTrainableLayersAtBegin();
+
   status = checkCompiledGraph();
   NN_RETURN_STATUS();
 
@@ -220,19 +220,32 @@ int NetworkGraph::realizeMultiOutputType(
   return status;
 }
 
-/** TODO: this needs special attention */
+/**
+ * @fixme: the implementation assumes loss layer should always be at the last
+ * layer and the there is only one loss, this assumption is not true
+ */
 int NetworkGraph::addLossLayer(const LossType loss_type) {
   int status = ML_ERROR_NONE;
   auto const &last_node = LNODE(graph.getSortedNode(graph.size() - 1));
   auto last_layer_node = getSortedLayerNode(graph.size() - 1);
 
-  if (last_node->getType() == LossLayer::type)
+  if (last_layer_node->getObject()->requireLabel()) {
     return status;
+  }
 
   if (loss_type == LossType::LOSS_NONE) {
-    return ML_ERROR_NONE;
+    return status;
   }
 
+  /**
+   * @note if model has property loss=sometype, this is dealt below. This
+   * semantics assume there is only one loss, so return ML_ERROR_INVALID_PARAM
+   * if there is more than one loss
+   */
+
+  /// @todo enable this
+  /// if (num_layer_that_requires_label > 2) { return error; }
+
   LossType updated_loss_type = loss_type;
 
   if (updated_loss_type == LossType::LOSS_ENTROPY) {
@@ -244,6 +257,7 @@ int NetworkGraph::addLossLayer(const LossType loss_type) {
       return ML_ERROR_NOT_SUPPORTED;
     }
 
+    /// @todo add remove node by it's name or address or equivalent
     graph.removeLastNode();
 
     switch (last_layer_node->getActivationType()) {
diff --git a/nntrainer/layers/loss_layer.cpp b/nntrainer/layers/loss_layer.cpp
index 1652645..410b741 100644
--- a/nntrainer/layers/loss_layer.cpp
+++ b/nntrainer/layers/loss_layer.cpp
@@ -48,10 +48,6 @@ void LossLayer::forwarding(bool training) {
   Tensor y = net_input[0]->getVariableRef();
   Tensor l;
   bool label_exist = !net_hidden[0]->getGradientRef().uninitialized();
-
-  if (net_input.empty())
-    label_exist = false;
-
   switch (loss_type) {
   case LossType::LOSS_MSE: {
     // y2 <- y2 - y;
diff --git a/nntrainer/layers/time_dist.h b/nntrainer/layers/time_dist.h
index bb76f92..a3dca83 100644
--- a/nntrainer/layers/time_dist.h
+++ b/nntrainer/layers/time_dist.h
@@ -98,6 +98,11 @@ public:
   std::shared_ptr<Layer> &getDistLayer() { return dist_layer; };
 
   /**
+   * @copydoc Layer::requireLabe()
+   */
+  bool requireLabel() const override { return dist_layer->requireLabel(); }
+
+  /**
    * @brief     get transposed Tensor according to time iteration axis
    *            [b, 1, h, w] to [h, 1, b, w]
    * @param[in] m Tensor
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
index f982a57..18a954c 100644
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -226,27 +226,34 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
     << " input_batch: " << input[0]->batch()
     << " label_batch: " << label[0]->batch() << " target_batch: " << batch_size;
 
-  auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
-  auto &last_layer =
-    model_graph.getSortedLayerNode(model_graph.size() - 1)->getObject();
+  auto fill_label = [&label](auto &layer) {
+    NNTR_THROW_IF(label.size() != layer.net_hidden.size(),
+                  std::invalid_argument)
+      << "label size does not match with the layer requirements"
+      << " layer: " << layer.getName() << " label size: " << label.size()
+      << " requirements size: " << layer.net_hidden.size();
+
+    for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+      layer.net_hidden[i]->getGradientRef() = *label[i];
+    }
+  };
+
+  auto clear_label = [](auto &layer) {
+    for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+      layer.net_hidden[i]->getGradientRef() = Tensor();
+    }
+  };
 
-  /// @note centroid_knn layer needs to be the last layer, currently it is
-  /// not possible because loss layer is always added.
-  /// if centroid_knn layer can be last layer, this loop is not required
+  /// feed or clear label
   for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-    auto const &layer_node = *iter;
-    auto &l = layer_node->getObject();
-    if (l->getType() == "centroid_knn") {
-      l->net_hidden[0]->getGradientRef() = *label[0].get();
+    auto &l = *iter->getObject();
+    if (l.requireLabel()) {
+      label.empty() ? clear_label(l) : fill_label(l);
     }
   }
 
-  if (label.empty())
-    last_layer->net_hidden[0]->getGradientRef() = Tensor();
-  else
-    last_layer->net_hidden[0]->getGradientRef() = *label[0].get();
-
-  first_layer->net_input[0]->getVariableRef() = *input[0].get();
+  auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
+  first_layer->net_input[0]->getVariableRef() = *input[0];
 
   return forwarding(training);
 }
@@ -303,18 +310,17 @@ void NeuralNetwork::backwarding(int iteration) {
   auto iter_begin = model_graph.getBackwardingBeginIter();
   auto iter_end = model_graph.getBackwardingEndIter();
 
-  auto const &lptr_begin = (*iter_begin);
-  if (lptr_begin->getObject()->getType() != LossLayer::type) {
-    bool has_loss = false;
-    if (lptr_begin->getObject()->getType() == TimeDistLayer::type) {
-      if (std::dynamic_pointer_cast<TimeDistLayer>(lptr_begin->getObject())
-            ->getDistLayerType() == LossLayer::type)
-        has_loss = true;
-    }
-    if (!has_loss)
-      throw std::runtime_error("Error: no loss provided for training.");
+  /// there is no layer to train, so backwarding is essentially noop
+  if (iter_begin == iter_end) {
+    return;
   }
 
+  auto const &lptr_begin = (*iter_begin);
+
+  if (lptr_begin->getObject()->requireLabel() == false)
+    throw std::runtime_error(
+      "Error: last layer does not accept label, we can't train");
+
   auto iter = iter_begin;
   for (; iter != iter_end - 1; iter++) {
     backwarding((*iter)->getObject(), iteration, true);
@@ -584,15 +590,6 @@ int NeuralNetwork::train_run() {
   auto &label = last_layer->net_hidden[0]->getGradientRef();
   auto &in = first_layer->net_input[0]->getVariableRef();
 
-  /// @todo migrate this to trait based system; sth like need label?
-
-  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-    auto const &layer_ = (*iter)->getObject();
-    if (layer_->getType() == "centroid_knn") {
-      layer_->net_hidden[0]->getGradientRef() = label;
-    }
-  }
-
   for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
     training.loss = 0.0f;
     status = data_buffer->run(nntrainer::BufferType::BUF_TRAIN);
-- 
2.7.4