From b9e3ac6ac3be492a4efd16feee4325316e85f5e0 Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Thu, 10 Jun 2021 13:21:03 +0900 Subject: [PATCH] [Loss] Apply requireLabel to feed label Currently, loss layer is the only one that can accept label which was prohibiting building a custom loss layer. **Major Changes** - Apply requireLabel to feed label - Last layer is not assumed loss layer when feeding label(still it is in other places, need to be dealt) **Minor Changes** - Feeding multiple label is now possible (spliting label is future work) - [Fix] Count Trainable includes loss layer - [Fix] Handle cases When all layer is not trainable for neuralnet::backwarding **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- nntrainer/graph/network_graph.cpp | 24 +++++++++++--- nntrainer/layers/loss_layer.cpp | 4 --- nntrainer/layers/time_dist.h | 5 +++ nntrainer/models/neuralnet.cpp | 67 +++++++++++++++++++-------------------- 4 files changed, 56 insertions(+), 44 deletions(-) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 8f48f07..578cbe1 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -50,11 +50,11 @@ int NetworkGraph::compile(const LossType loss_type) { graph.topologicalSort(); - countNonTrainableLayersAtBegin(); - status = addLossLayer(loss_type); NN_RETURN_STATUS(); + countNonTrainableLayersAtBegin(); + status = checkCompiledGraph(); NN_RETURN_STATUS(); @@ -220,19 +220,32 @@ int NetworkGraph::realizeMultiOutputType( return status; } -/** TODO: this needs special attention */ +/** + * @fixme: the implementation assumes loss layer should always be at the last + * layer and the there is only one loss, this assumption is not true + */ int NetworkGraph::addLossLayer(const LossType loss_type) { int status = ML_ERROR_NONE; auto const &last_node = LNODE(graph.getSortedNode(graph.size() - 1)); auto last_layer_node = getSortedLayerNode(graph.size() - 1); - if (last_node->getType() == LossLayer::type) + if (last_layer_node->getObject()->requireLabel()) { return status; + } if (loss_type == LossType::LOSS_NONE) { - return ML_ERROR_NONE; + return status; } + /** + * @note if model has property loss=sometype, this is dealt below. This + * semantics assume there is only one loss, so return ML_ERROR_INVALID_PARAM + * if there is more than one loss + */ + + /// @todo enable this + /// if (num_layer_that_requires_label > 2) { return error; } + LossType updated_loss_type = loss_type; if (updated_loss_type == LossType::LOSS_ENTROPY) { @@ -244,6 +257,7 @@ int NetworkGraph::addLossLayer(const LossType loss_type) { return ML_ERROR_NOT_SUPPORTED; } + /// @todo add remove node by it's name or address or equivalent graph.removeLastNode(); switch (last_layer_node->getActivationType()) { diff --git a/nntrainer/layers/loss_layer.cpp b/nntrainer/layers/loss_layer.cpp index 1652645..410b741 100644 --- a/nntrainer/layers/loss_layer.cpp +++ b/nntrainer/layers/loss_layer.cpp @@ -48,10 +48,6 @@ void LossLayer::forwarding(bool training) { Tensor y = net_input[0]->getVariableRef(); Tensor l; bool label_exist = !net_hidden[0]->getGradientRef().uninitialized(); - - if (net_input.empty()) - label_exist = false; - switch (loss_type) { case LossType::LOSS_MSE: { // y2 <- y2 - y; diff --git a/nntrainer/layers/time_dist.h b/nntrainer/layers/time_dist.h index bb76f92..a3dca83 100644 --- a/nntrainer/layers/time_dist.h +++ b/nntrainer/layers/time_dist.h @@ -98,6 +98,11 @@ public: std::shared_ptr &getDistLayer() { return dist_layer; }; /** + * @copydoc Layer::requireLabe() + */ + bool requireLabel() const override { return dist_layer->requireLabel(); } + + /** * @brief get transposed Tensor according to time iteration axis * [b, 1, h, w] to [h, 1, b, w] * @param[in] m Tensor diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index f982a57..18a954c 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -226,27 +226,34 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input, << " input_batch: " << input[0]->batch() << " label_batch: " << label[0]->batch() << " target_batch: " << batch_size; - auto &first_layer = model_graph.getSortedLayerNode(0)->getObject(); - auto &last_layer = - model_graph.getSortedLayerNode(model_graph.size() - 1)->getObject(); + auto fill_label = [&label](auto &layer) { + NNTR_THROW_IF(label.size() != layer.net_hidden.size(), + std::invalid_argument) + << "label size does not match with the layer requirements" + << " layer: " << layer.getName() << " label size: " << label.size() + << " requirements size: " << layer.net_hidden.size(); + + for (unsigned int i = 0; i < layer.net_hidden.size(); i++) { + layer.net_hidden[i]->getGradientRef() = *label[i]; + } + }; + + auto clear_label = [](auto &layer) { + for (unsigned int i = 0; i < layer.net_hidden.size(); i++) { + layer.net_hidden[i]->getGradientRef() = Tensor(); + } + }; - /// @note centroid_knn layer needs to be the last layer, currently it is - /// not possible because loss layer is always added. - /// if centroid_knn layer can be last layer, this loop is not required + /// feed or clear label for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { - auto const &layer_node = *iter; - auto &l = layer_node->getObject(); - if (l->getType() == "centroid_knn") { - l->net_hidden[0]->getGradientRef() = *label[0].get(); + auto &l = *iter->getObject(); + if (l.requireLabel()) { + label.empty() ? clear_label(l) : fill_label(l); } } - if (label.empty()) - last_layer->net_hidden[0]->getGradientRef() = Tensor(); - else - last_layer->net_hidden[0]->getGradientRef() = *label[0].get(); - - first_layer->net_input[0]->getVariableRef() = *input[0].get(); + auto &first_layer = model_graph.getSortedLayerNode(0)->getObject(); + first_layer->net_input[0]->getVariableRef() = *input[0]; return forwarding(training); } @@ -303,18 +310,17 @@ void NeuralNetwork::backwarding(int iteration) { auto iter_begin = model_graph.getBackwardingBeginIter(); auto iter_end = model_graph.getBackwardingEndIter(); - auto const &lptr_begin = (*iter_begin); - if (lptr_begin->getObject()->getType() != LossLayer::type) { - bool has_loss = false; - if (lptr_begin->getObject()->getType() == TimeDistLayer::type) { - if (std::dynamic_pointer_cast(lptr_begin->getObject()) - ->getDistLayerType() == LossLayer::type) - has_loss = true; - } - if (!has_loss) - throw std::runtime_error("Error: no loss provided for training."); + /// there is no layer to train, so backwarding is essentially noop + if (iter_begin == iter_end) { + return; } + auto const &lptr_begin = (*iter_begin); + + if (lptr_begin->getObject()->requireLabel() == false) + throw std::runtime_error( + "Error: last layer does not accept label, we can't train"); + auto iter = iter_begin; for (; iter != iter_end - 1; iter++) { backwarding((*iter)->getObject(), iteration, true); @@ -584,15 +590,6 @@ int NeuralNetwork::train_run() { auto &label = last_layer->net_hidden[0]->getGradientRef(); auto &in = first_layer->net_input[0]->getVariableRef(); - /// @todo migrate this to trait based system; sth like need label? - - for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) { - auto const &layer_ = (*iter)->getObject(); - if (layer_->getType() == "centroid_knn") { - layer_->net_hidden[0]->getGradientRef() = label; - } - } - for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) { training.loss = 0.0f; status = data_buffer->run(nntrainer::BufferType::BUF_TRAIN); -- 2.7.4