[Loss] Apply requireLabel to feed label

author Jihoon Lee <jhoon.it.lee@samsung.com>

Thu, 10 Jun 2021 04:21:03 +0000 (13:21 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 15 Jun 2021 10:40:38 +0000 (19:40 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Thu, 10 Jun 2021 04:21:03 +0000 (13:21 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 15 Jun 2021 10:40:38 +0000 (19:40 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 8f48f07bfd48aeb64d58725197be7d29aa68703a..578cbe1015fa07f0f45b788464689504bc4056e4 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -50,11 +50,11 @@ int NetworkGraph::compile(const LossType loss_type) {
  
    graph.topologicalSort();
  
-  countNonTrainableLayersAtBegin();
-
    status = addLossLayer(loss_type);
    NN_RETURN_STATUS();
  
+  countNonTrainableLayersAtBegin();
+
    status = checkCompiledGraph();
    NN_RETURN_STATUS();
  
@@ -220,19 +220,32 @@ int NetworkGraph::realizeMultiOutputType(
    return status;
  }
  
-/** TODO: this needs special attention */
+/**
+ * @fixme: the implementation assumes loss layer should always be at the last
+ * layer and the there is only one loss, this assumption is not true
+ */
  int NetworkGraph::addLossLayer(const LossType loss_type) {
    int status = ML_ERROR_NONE;
    auto const &last_node = LNODE(graph.getSortedNode(graph.size() - 1));
    auto last_layer_node = getSortedLayerNode(graph.size() - 1);
  
-  if (last_node->getType() == LossLayer::type)
+  if (last_layer_node->getObject()->requireLabel()) {
      return status;
+  }
  
    if (loss_type == LossType::LOSS_NONE) {
-    return ML_ERROR_NONE;
+    return status;
    }
  
+  /**
+   * @note if model has property loss=sometype, this is dealt below. This
+   * semantics assume there is only one loss, so return ML_ERROR_INVALID_PARAM
+   * if there is more than one loss
+   */
+
+  /// @todo enable this
+  /// if (num_layer_that_requires_label > 2) { return error; }
+
    LossType updated_loss_type = loss_type;
  
    if (updated_loss_type == LossType::LOSS_ENTROPY) {
@@ -244,6 +257,7 @@ int NetworkGraph::addLossLayer(const LossType loss_type) {
        return ML_ERROR_NOT_SUPPORTED;
      }
  
+    /// @todo add remove node by it's name or address or equivalent
      graph.removeLastNode();
  
      switch (last_layer_node->getActivationType()) {
diff --git a/nntrainer/layers/loss_layer.cpp b/nntrainer/layers/loss_layer.cpp

index 16526451f0762fd23baa0858887831944f296650..410b741872ee744168011c57bd8667db7c92f0fc 100644 (file)
--- a/nntrainer/layers/loss_layer.cpp
+++ b/nntrainer/layers/loss_layer.cpp
@@ -48,10 +48,6 @@ void LossLayer::forwarding(bool training) {
    Tensor y = net_input[0]->getVariableRef();
    Tensor l;
    bool label_exist = !net_hidden[0]->getGradientRef().uninitialized();
-
-  if (net_input.empty())
-    label_exist = false;
-
    switch (loss_type) {
    case LossType::LOSS_MSE: {
      // y2 <- y2 - y;
diff --git a/nntrainer/layers/time_dist.h b/nntrainer/layers/time_dist.h

index bb76f92161561a5fa0975798fd97951c88d060f9..a3dca835562c9160e2ecf2b528ca7a9539347912 100644 (file)
--- a/nntrainer/layers/time_dist.h
+++ b/nntrainer/layers/time_dist.h
@@ -97,6 +97,11 @@ public:
     */
    std::shared_ptr<Layer> &getDistLayer() { return dist_layer; };
  
+  /**
+   * @copydoc Layer::requireLabe()
+   */
+  bool requireLabel() const override { return dist_layer->requireLabel(); }
+
    /**
     * @brief     get transposed Tensor according to time iteration axis
     *            [b, 1, h, w] to [h, 1, b, w]
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index f982a575bb7ed260a6fb1204a7e3ae0b5ebe07cb..18a954c949d7f3dd0a47f84dc961f6fefe50d887 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -226,27 +226,34 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
      << " input_batch: " << input[0]->batch()
      << " label_batch: " << label[0]->batch() << " target_batch: " << batch_size;
  
-  auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
-  auto &last_layer =
-    model_graph.getSortedLayerNode(model_graph.size() - 1)->getObject();
+  auto fill_label = [&label](auto &layer) {
+    NNTR_THROW_IF(label.size() != layer.net_hidden.size(),
+                  std::invalid_argument)
+      << "label size does not match with the layer requirements"
+      << " layer: " << layer.getName() << " label size: " << label.size()
+      << " requirements size: " << layer.net_hidden.size();
+
+    for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+      layer.net_hidden[i]->getGradientRef() = *label[i];
+    }
+  };
+
+  auto clear_label = [](auto &layer) {
+    for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+      layer.net_hidden[i]->getGradientRef() = Tensor();
+    }
+  };
  
-  /// @note centroid_knn layer needs to be the last layer, currently it is
-  /// not possible because loss layer is always added.
-  /// if centroid_knn layer can be last layer, this loop is not required
+  /// feed or clear label
    for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-    auto const &layer_node = *iter;
-    auto &l = layer_node->getObject();
-    if (l->getType() == "centroid_knn") {
-      l->net_hidden[0]->getGradientRef() = *label[0].get();
+    auto &l = *iter->getObject();
+    if (l.requireLabel()) {
+      label.empty() ? clear_label(l) : fill_label(l);
      }
    }
  
-  if (label.empty())
-    last_layer->net_hidden[0]->getGradientRef() = Tensor();
-  else
-    last_layer->net_hidden[0]->getGradientRef() = *label[0].get();
-
-  first_layer->net_input[0]->getVariableRef() = *input[0].get();
+  auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
+  first_layer->net_input[0]->getVariableRef() = *input[0];
  
    return forwarding(training);
  }
@@ -303,18 +310,17 @@ void NeuralNetwork::backwarding(int iteration) {
    auto iter_begin = model_graph.getBackwardingBeginIter();
    auto iter_end = model_graph.getBackwardingEndIter();
  
-  auto const &lptr_begin = (*iter_begin);
-  if (lptr_begin->getObject()->getType() != LossLayer::type) {
-    bool has_loss = false;
-    if (lptr_begin->getObject()->getType() == TimeDistLayer::type) {
-      if (std::dynamic_pointer_cast<TimeDistLayer>(lptr_begin->getObject())
-            ->getDistLayerType() == LossLayer::type)
-        has_loss = true;
-    }
-    if (!has_loss)
-      throw std::runtime_error("Error: no loss provided for training.");
+  /// there is no layer to train, so backwarding is essentially noop
+  if (iter_begin == iter_end) {
+    return;
    }
  
+  auto const &lptr_begin = (*iter_begin);
+
+  if (lptr_begin->getObject()->requireLabel() == false)
+    throw std::runtime_error(
+      "Error: last layer does not accept label, we can't train");
+
    auto iter = iter_begin;
    for (; iter != iter_end - 1; iter++) {
      backwarding((*iter)->getObject(), iteration, true);
@@ -584,15 +590,6 @@ int NeuralNetwork::train_run() {
    auto &label = last_layer->net_hidden[0]->getGradientRef();
    auto &in = first_layer->net_input[0]->getVariableRef();
  
-  /// @todo migrate this to trait based system; sth like need label?
-
-  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-    auto const &layer_ = (*iter)->getObject();
-    if (layer_->getType() == "centroid_knn") {
-      layer_->net_hidden[0]->getGradientRef() = label;
-    }
-  }
-
    for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
      training.loss = 0.0f;
      status = data_buffer->run(nntrainer::BufferType::BUF_TRAIN);
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Thu, 10 Jun 2021 04:21:03 +0000 (13:21 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 15 Jun 2021 10:40:38 +0000 (19:40 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/layers/loss_layer.cpp		patch \| blob \| history
nntrainer/layers/time_dist.h		patch \| blob \| history
nntrainer/models/neuralnet.cpp		patch \| blob \| history