graph.topologicalSort();
- countNonTrainableLayersAtBegin();
-
status = addLossLayer(loss_type);
NN_RETURN_STATUS();
+ countNonTrainableLayersAtBegin();
+
status = checkCompiledGraph();
NN_RETURN_STATUS();
return status;
}
-/** TODO: this needs special attention */
+/**
+ * @fixme: the implementation assumes loss layer should always be at the last
+ * layer and the there is only one loss, this assumption is not true
+ */
int NetworkGraph::addLossLayer(const LossType loss_type) {
int status = ML_ERROR_NONE;
auto const &last_node = LNODE(graph.getSortedNode(graph.size() - 1));
auto last_layer_node = getSortedLayerNode(graph.size() - 1);
- if (last_node->getType() == LossLayer::type)
+ if (last_layer_node->getObject()->requireLabel()) {
return status;
+ }
if (loss_type == LossType::LOSS_NONE) {
- return ML_ERROR_NONE;
+ return status;
}
+ /**
+ * @note if model has property loss=sometype, this is dealt below. This
+ * semantics assume there is only one loss, so return ML_ERROR_INVALID_PARAM
+ * if there is more than one loss
+ */
+
+ /// @todo enable this
+ /// if (num_layer_that_requires_label > 2) { return error; }
+
LossType updated_loss_type = loss_type;
if (updated_loss_type == LossType::LOSS_ENTROPY) {
return ML_ERROR_NOT_SUPPORTED;
}
+ /// @todo add remove node by it's name or address or equivalent
graph.removeLastNode();
switch (last_layer_node->getActivationType()) {
<< " input_batch: " << input[0]->batch()
<< " label_batch: " << label[0]->batch() << " target_batch: " << batch_size;
- auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
- auto &last_layer =
- model_graph.getSortedLayerNode(model_graph.size() - 1)->getObject();
+ auto fill_label = [&label](auto &layer) {
+ NNTR_THROW_IF(label.size() != layer.net_hidden.size(),
+ std::invalid_argument)
+ << "label size does not match with the layer requirements"
+ << " layer: " << layer.getName() << " label size: " << label.size()
+ << " requirements size: " << layer.net_hidden.size();
+
+ for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+ layer.net_hidden[i]->getGradientRef() = *label[i];
+ }
+ };
+
+ auto clear_label = [](auto &layer) {
+ for (unsigned int i = 0; i < layer.net_hidden.size(); i++) {
+ layer.net_hidden[i]->getGradientRef() = Tensor();
+ }
+ };
- /// @note centroid_knn layer needs to be the last layer, currently it is
- /// not possible because loss layer is always added.
- /// if centroid_knn layer can be last layer, this loop is not required
+ /// feed or clear label
for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- auto const &layer_node = *iter;
- auto &l = layer_node->getObject();
- if (l->getType() == "centroid_knn") {
- l->net_hidden[0]->getGradientRef() = *label[0].get();
+ auto &l = *iter->getObject();
+ if (l.requireLabel()) {
+ label.empty() ? clear_label(l) : fill_label(l);
}
}
- if (label.empty())
- last_layer->net_hidden[0]->getGradientRef() = Tensor();
- else
- last_layer->net_hidden[0]->getGradientRef() = *label[0].get();
-
- first_layer->net_input[0]->getVariableRef() = *input[0].get();
+ auto &first_layer = model_graph.getSortedLayerNode(0)->getObject();
+ first_layer->net_input[0]->getVariableRef() = *input[0];
return forwarding(training);
}
auto iter_begin = model_graph.getBackwardingBeginIter();
auto iter_end = model_graph.getBackwardingEndIter();
- auto const &lptr_begin = (*iter_begin);
- if (lptr_begin->getObject()->getType() != LossLayer::type) {
- bool has_loss = false;
- if (lptr_begin->getObject()->getType() == TimeDistLayer::type) {
- if (std::dynamic_pointer_cast<TimeDistLayer>(lptr_begin->getObject())
- ->getDistLayerType() == LossLayer::type)
- has_loss = true;
- }
- if (!has_loss)
- throw std::runtime_error("Error: no loss provided for training.");
+ /// there is no layer to train, so backwarding is essentially noop
+ if (iter_begin == iter_end) {
+ return;
}
+ auto const &lptr_begin = (*iter_begin);
+
+ if (lptr_begin->getObject()->requireLabel() == false)
+ throw std::runtime_error(
+ "Error: last layer does not accept label, we can't train");
+
auto iter = iter_begin;
for (; iter != iter_end - 1; iter++) {
backwarding((*iter)->getObject(), iteration, true);
auto &label = last_layer->net_hidden[0]->getGradientRef();
auto &in = first_layer->net_input[0]->getVariableRef();
- /// @todo migrate this to trait based system; sth like need label?
-
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- auto const &layer_ = (*iter)->getObject();
- if (layer_->getType() == "centroid_knn") {
- layer_->net_hidden[0]->getGradientRef() = label;
- }
- }
-
for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
training.loss = 0.0f;
status = data_buffer->run(nntrainer::BufferType::BUF_TRAIN);