1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
5 * @file network_graph.h
7 * @see https://github.com/nnstreamer/nntrainer
8 * @author Jijoong Moon <jijoong.moon@samsung.com>
9 * @bug No known bugs except for NYI items
10 * @brief This is Network Graph Class for Neural Network
12 * @todo Support multi-input graph.
20 #include <activation_layer.h>
21 #include <addition_layer.h>
23 #include <concat_layer.h>
24 #include <connection.h>
25 #include <cross_entropy_loss_layer.h>
26 #include <cross_entropy_sigmoid_loss_layer.h>
27 #include <cross_entropy_softmax_loss_layer.h>
28 #include <flatten_layer.h>
30 #include <identity_layer.h>
31 #include <input_layer.h>
32 #include <layer_node.h>
33 #include <layer_normalization_layer.h>
35 #include <multiout_layer.h>
36 #include <network_graph.h>
37 #include <nntrainer_error.h>
38 #include <nntrainer_log.h>
42 #include <split_layer.h>
43 #include <time_dist.h>
45 #include <util_func.h>
47 #define LNODE(x) std::static_pointer_cast<LayerNode>(x)
51 int NetworkGraph::compile(const std::string &loss_type) {
52 int status = ML_ERROR_NONE;
54 status = isCompilable();
58 setOutputConnections();
59 } catch (std::exception &e) {
60 ml_loge("setting output layer failed, reason: %s", e.what());
61 return ML_ERROR_INVALID_PARAMETER;
64 graph.realizeInputOutputNode();
67 /// @todo realize loss beforehand
68 status = addLossLayer(loss_type);
70 } catch (const std::exception &e) {
71 ml_loge("%s", e.what());
72 status = ML_ERROR_INVALID_PARAMETER;
76 graph.topologicalSort();
79 forward_iter_end = (*(cend() - 1)).get();
83 status = checkCompiledGraph();
91 void NetworkGraph::setExecutionOrder() {
92 auto backward_order = graph.size();
93 for (auto iter = getBackwardingBeginIter(); iter != getBackwardingEndIter();
96 auto order_idx = getBackwardingEndIter() - iter - 1;
97 auto forward_order = order_idx;
98 auto calc_gradient_order = backward_order;
99 if (node->getTrainable())
101 auto calc_derivative_order = backward_order;
102 if (node->getTrainable())
104 auto apply_gradient_order = backward_order++;
106 node->setExecutionOrder({forward_order, calc_gradient_order,
107 calc_derivative_order, apply_gradient_order});
111 * This sets max execution order temporarily till model is initialized.
112 * This set max execution order is used to extend gradient exec orders for
115 graph_exec_end = std::get<3>((*(cbegin()))->getExecutionOrder());
118 void NetworkGraph::addLayerNode(std::unique_ptr<Layer> layer) {
119 graph.addNode(std::make_unique<LayerNode>(std::move(layer)));
122 int NetworkGraph::addLossLayer(const std::string &loss_type_) {
123 for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
124 auto output_layer_node = LNODE(graph.getOutputNode(i));
125 std::string loss_type = loss_type_;
127 if (output_layer_node->requireLabel())
130 if (loss_type.empty())
133 auto second_to_last_layer_node = output_layer_node;
134 bool is_cross_entropy_loss =
135 istrequal(loss_type, CrossEntropyLossLayer::type);
136 if (is_cross_entropy_loss) {
137 auto type = output_layer_node->getType();
139 if (type != ActivationLayer::type) {
140 throw exception::not_supported(
141 "Error: Cross Entropy need last layer to have softmax or sigmoid"
145 switch (output_layer_node->getActivationType()) {
146 case ActivationType::ACT_SIGMOID:
147 loss_type = CrossEntropySigmoidLossLayer::type;
149 case ActivationType::ACT_SOFTMAX:
150 loss_type = CrossEntropySoftmaxLossLayer::type;
153 throw exception::not_supported(
154 "Error: Cross Entropy not supported without softmax or sigmoid.");
157 second_to_last_layer_node =
158 LNODE(graph.getNode(output_layer_node->getInputConnectionName(0)));
161 std::shared_ptr<LayerNode> lnode = createLayerNode(loss_type);
162 graph.ensureName(*lnode);
164 if (second_to_last_layer_node->getDistribute()) {
165 lnode->setProperty({"distribute=true"});
168 /// @todo remove this by add loss at realization
169 second_to_last_layer_node->setOutputLayers({lnode->getName()});
171 {"input_layers=" + second_to_last_layer_node->getName()});
173 if (is_cross_entropy_loss) {
174 graph.replaceNode(output_layer_node, lnode);
176 graph.addNode(lnode, false);
178 graph.replaceOutputNode(i, lnode);
181 return ML_ERROR_NONE;
184 void NetworkGraph::setOutputConnections() {
185 for (auto layer_iter = cbegin(); layer_iter != cend(); layer_iter++) {
186 const auto &node = *layer_iter;
187 for (auto i = 0u, num_inode = node->getNumInputConnections(); i < num_inode;
189 const auto &name = node->getInputConnectionName(i);
190 const auto &idx = node->getInputConnectionIndex(i);
192 auto node_setting_output = getLayerNode(name);
193 node_setting_output->setOutputConnection(idx, node->getName(), i);
198 int NetworkGraph::isCompilable() {
200 ml_loge("Graph is already compiled");
201 return ML_ERROR_NOT_SUPPORTED;
205 ml_loge("Graph is empty");
206 return ML_ERROR_INVALID_PARAMETER;
209 return ML_ERROR_NONE;
212 int NetworkGraph::checkCompiledGraph() {
213 /** Dimension of input layers must be known */
214 for (auto iter = cbegin(); iter != cend(); iter++) {
215 auto lnode = (*iter);
216 if (lnode->getNumInputConnections() == 0) {
217 if (!lnode->hasInputShapeProperty()) {
218 ml_loge("Layer with no inbound connection need input_shape property");
219 return ML_ERROR_INVALID_PARAMETER;
224 return ML_ERROR_NONE;
227 void NetworkGraph::markNodesForBackwarding() {
228 /** accumulate all the nodes which must support backwarding */
229 std::unordered_set<std::string> must_support_backwarding;
232 * if a node is trainable, then all the nodes ahead of it must support
233 * backwarding operation
235 for (auto iter = cbegin(); iter != cend(); iter++) {
236 auto lnode = (*iter);
237 if (lnode->getTrainable() ||
238 must_support_backwarding.find(lnode->getName()) !=
239 must_support_backwarding.end()) {
240 if (lnode->getTrainable()) {
241 lnode->needsCalcGradient(true);
244 if (lnode->supportBackwarding() && !optimize_memory) {
245 lnode->needsCalcDerivative(true);
249 for (auto i = 0u, num_node = lnode->getNumOutputConnections();
251 auto conn = lnode->getOutputConnection(i);
256 must_support_backwarding.insert(conn->getName());
261 /** mark all the required nodes support backwarding */
262 for (auto const &node_name : must_support_backwarding) {
263 auto ln = LNODE(graph.getNode(node_name)).get();
264 ln->needsCalcDerivative(true);
268 void NetworkGraph::setBatchSize(unsigned int batch_size) {
269 if (batch_size == this->batch_size)
272 this->batch_size = batch_size;
273 if (!input_list.empty() && getInputDimension()[0].batch() == batch_size)
276 auto allocated = tensor_manager->isAllocated();
281 for (auto iter = cbegin(); iter != cend(); iter++) {
282 if ((*iter)->isFinalized()) {
283 /// resize tensors spec
284 /// @todo remove below, if cutsom tensor needs to change dimension
285 /// according to the tensor, it must be done explicitly, or at least have
286 /// a property to control the behavior
287 const RunLayerContext &context = (*iter)->getRunContext();
288 for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
289 auto const &ts = context.getTensor(idx);
290 tensor_manager->setBatchSize(ts.getName(), ts.getDim().batch());
291 if (context.tensorHasGradient(idx)) {
292 auto const &ts_grad = context.getTensorGrad(idx);
293 tensor_manager->setBatchSize(ts_grad.getName(),
294 ts_grad.getDim().batch());
297 /// override setting batch as per request
298 (*iter)->setBatch(batch_size);
301 /// resize input and output spec
302 tensor_manager->setBatchSize(batch_size);
305 allocateTensors(exec_mode);
307 /** update input and label dimensions */
308 for (unsigned int idx = 0; idx < input_list.size(); idx++)
309 input_dims[idx] = tensor_manager->getTensor(input_list[idx])->getDim();
310 for (unsigned int idx = 0; idx < label_list.size(); idx++)
311 label_dims[idx] = tensor_manager->getTensor(label_list[idx])->getDim();
314 void NetworkGraph::applyGradients(
315 LayerNode *node, const std::function<void(Weight &)> &apply_func) {
317 if (!node->getTrainable())
320 TRACE_MEMORY() << node->getName() + ": AG";
321 TRACE_TIME() << node->getName() + ": AG";
323 auto &rc = node->getRunContext();
324 auto num_weight = rc.getNumWeights();
325 for (unsigned i = 0; i < num_weight; ++i) {
326 if (!rc.weightHasGradient(i)) {
330 if (!rc.isGradientLastAccess(i)) {
331 /// @note instead of checking the last access of the weight, checking
332 /// if weights are dependent to others to minimize overhead.
333 /// this logic assums that the source of the dependent weight must be
334 /// prior to the dependent.
338 if (rc.isGradientClipByGlobalNorm(i)) {
340 * @note the weights whose gradient are to be clipped by global norm will
341 * be clipped at once at the end of iteration and applied then.
346 apply_func(rc.getWeightObject(i));
350 sharedConstTensors NetworkGraph::forwarding(
352 std::function<void(std::shared_ptr<LayerNode>, bool)> forwarding_op,
353 std::function<bool(void *userdata)> stop_cb, void *userdata) {
354 for (auto iter = cbegin(); iter != cend() && !stop_cb(userdata); iter++) {
356 PROFILE_TIME_START(profile_keys.at(ln->getType()));
357 forwarding_op(*iter, training);
358 PROFILE_TIME_END(profile_keys.at(ln->getType()));
361 sharedConstTensors out;
362 for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
363 auto const &output_layer_node = LNODE(graph.getOutputNode(i));
364 for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
365 out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
372 void NetworkGraph::backwarding(
374 std::function<void(std::shared_ptr<LayerNode>, int)> &backwarding_op,
375 std::function<void(Weight &, int)> &apply_grad_clip_op,
376 std::function<bool(void *userdata)> stop_cb, void *userdata) const {
378 * last layer backwarding is run out of this loop
380 auto iter_begin = getBackwardingBeginIter();
381 auto iter_end = getBackwardingEndIter();
383 /// there is no layer to train, so backwarding is essentially noop
384 if (iter_begin == iter_end) {
388 auto const &lptr_begin = (*iter_begin);
390 if (lptr_begin->requireLabel() == false)
391 throw std::runtime_error(
392 "Error: last layer does not accept label, we can't train");
394 for (auto iter = iter_begin; iter != iter_end && !stop_cb(userdata); iter++) {
396 PROFILE_TIME_START(profile_keys.at(ln->getType()));
397 backwarding_op(ln, iteration);
398 PROFILE_TIME_END(profile_keys.at(ln->getType()));
401 /** perform clipping of the gradients by global norm if any */
402 if (clip_weights.empty())
405 /** calculate the global norm */
406 Tensor global_norm_t(
407 TensorDim({1u, 1u, 1u, (unsigned int)clip_weights.size()}));
408 float *global_norm_data = global_norm_t.getData();
409 for (unsigned int idx = 0; idx < clip_weights.size(); idx++) {
410 auto const &w = clip_weights[idx];
411 global_norm_data[idx] = w->getGradientNorm();
413 float global_norm = global_norm_t.l2norm();
414 /** apply the gradient with the above global norm */
415 for (auto w : clip_weights) {
416 w->clipGradientByGlobalNorm(global_norm);
418 /** apply the gradient with the above global norm */
419 for (auto w : clip_weights) {
420 apply_grad_clip_op(*w, iteration);
424 LayerNode *NetworkGraph::computeBackwardEnd() {
425 int max_exec_order = -1;
426 LayerNode *node = nullptr;
428 if (!optimize_memory) {
429 return (*cbegin()).get();
432 for (auto iter = getBackwardingBeginIter(); iter != getBackwardingEndIter();
435 const auto &exec_order = ln->getExecutionOrder();
436 int cur_order = std::get<0>(exec_order);
437 if (ln->needsCalcDerivative() || ln->needsCalcGradient()) {
439 cur_order = std::get<2>(exec_order);
441 cur_order = std::get<1>(exec_order);
445 NNTR_THROW_IF(max_exec_order == cur_order, std::invalid_argument)
446 << "layer node: " << ln->getName()
447 << " has duplicated max_exec_order, this should not happen, current "
451 if (max_exec_order < cur_order) {
452 max_exec_order = cur_order;
461 * @brief Allocate memory for all the managed tensors
463 void NetworkGraph::allocateTensors(ExecutionMode exec_mode_) {
464 exec_mode = exec_mode_;
465 if (exec_mode == ExecutionMode::INFERENCE)
467 * get the order of execution/usage order for the forwarding of the last
468 * layer and pass that as the max_exec_order ensuring that all tensors
469 * with usage less than the max_exec_order are allocated.
471 tensor_manager->allocateTensors(
472 std::get<0>((*(cend() - 1))->getExecutionOrder()));
475 * get the order of execution/usage order for the backwarding of the first
476 * layer (as that will be the last layer to executed in the backwarding)
477 * and pass that as the max_exec_order ensuring that all tensors with
478 * usage less than the max_exec_order are allocated.
480 tensor_manager->allocateTensors(
481 std::get<3>(backward_iter_end->getExecutionOrder()));
485 std::vector<TensorDim> NetworkGraph::getInputDimension() const {
486 NNTR_THROW_IF(input_dims.empty(), std::invalid_argument)
487 << "[NetworkGraph] the graph has no node identified as input!";
491 unsigned int NetworkGraph::getBatchSize() const { return batch_size; }
493 std::vector<TensorDim> NetworkGraph::getOutputDimension() const {
494 NNTR_THROW_IF(label_dims.empty(), std::invalid_argument)
495 << "[NetworkGraph] the graph has no node identified as output!";
496 /// for now, outputting label_dims works, later label dim will be different
497 /// from output dimension
501 std::vector<std::shared_ptr<LayerNode>>
502 NetworkGraph::getUnsortedLayers(const std::string &input_layer,
503 const std::string &output_layer) const {
504 /// @fixme: this won't work if input, output layers are not in order
505 /// Further, this function must be removed. There should be rather
506 /// getAllNames and getLayerByName instead of getUnsortedLayers.
508 /** count layers after output layer */
509 unsigned int num_layers_remove_end = 0;
510 if (!output_layer.empty()) {
511 for (auto iter = graph.crbegin(); iter != graph.crend(); iter++) {
512 if ((*iter)->getName() != output_layer)
513 num_layers_remove_end++;
519 if (num_layers_remove_end == graph.size())
522 /** count layers before input layer */
523 unsigned int num_layers_remove_start = 0;
524 if (!input_layer.empty()) {
525 for (auto iter = graph.cbegin();
526 iter != graph.cend() - num_layers_remove_end; iter++) {
527 if ((*iter)->getName() != input_layer)
528 num_layers_remove_start++;
534 /** copy the graph and return */
535 std::vector<std::shared_ptr<LayerNode>> ret;
536 std::transform(graph.cbegin() + num_layers_remove_start,
537 graph.cend() - num_layers_remove_end, std::back_inserter(ret),
538 [](auto const &elem) { return LNODE(elem); });
543 std::vector<std::shared_ptr<LayerNode>> NetworkGraph::getLayerNodes() const {
544 return std::vector<std::shared_ptr<LayerNode>>(cbegin(), cend());
547 void NetworkGraph::addLayer(std::shared_ptr<LayerNode> layer) {
549 throw std::runtime_error("Cannot modify graph after compile");
551 /** Insert the layer to the graph */
552 graph.addNode(layer);
556 NetworkGraph::canExecuteInPlace(const std::shared_ptr<LayerNode> &lnode) {
557 if (!lnode->supportInPlace())
558 return InPlace::NONE;
560 /** layers which behave as a no-op - flatten */
561 auto no_op = [](const std::shared_ptr<LayerNode> &lnode) {
562 return lnode->getType() == FlattenLayer::type ||
563 lnode->getType() == IdentityLayer::type;
566 /** layers which behave as a no-op but shares memory among parallel nodes -
568 auto no_op_shared = [](const std::shared_ptr<LayerNode> &lnode) {
569 return lnode->getType() == MultiOutLayer::type;
573 * layers whose backwarding is not dependent on input/output but only its
574 * derivatives and weights, if any - batch normalization
576 auto io_independent_backwarding =
577 [](const std::shared_ptr<LayerNode> &lnode) {
578 return (lnode->getType() == BatchNormalizationLayer::type) ||
579 (lnode->getType() == LayerNormalizationLayer::type);
583 * @note Conditions to decide if this layer node can be in-place:
584 * 1. if the layer is a no-op, then it can operate in-place as it is not
585 * modifying its input/output tensors and does not need to check its
586 * neighboring nodes for dependency.
587 * 2. if the layer is not supporting backwarding, there is no dependency
588 * requirement with other nodes for backwarding.
590 * @note Conditions to decide the type of inplace for this layer:
591 * 1. if the previous layers were restricting, then this layer will also be
593 * 2. if the previous layer were non_restricting or not inplace, then this
594 * layer will be non-restricting.
596 if (no_op(lnode) || !lnode->supportBackwarding()) {
597 for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
599 const auto &input_name = lnode->getInputConnectionName(i);
600 if (getLayerNode(input_name)->executeInPlace() == InPlace::RESTRICTING)
601 return InPlace::RESTRICTING;
603 return InPlace::NON_RESTRICTING;
607 * @note Conditions to decide if this layer node can be in-place:
608 * if the layer is a no-op-shared, then it can operate in-place as it is not
609 * modifying its input/output tensors and does not need to check its
610 * neighboring nodes for dependency.
612 * @note Conditions to decide the type of inplace for this layer:
613 * As all the output nodes are sharing memory, the output nodes cant execute
614 * inplace, and then its restricting mode.
616 if (no_op_shared(lnode))
617 return InPlace::RESTRICTING;
620 * @note Conditions to decide if this layer node can be in-place:
621 * This is a generic case where the layer can support in-place but will
622 * modify its input in-place. This includes layers like activation, etc.
623 * Apply checks below to ensure that the layers can work in-place:
624 * - if any of the input layer are restriction, then this layer cannot work
625 * as layers behind this layer have added restrictions.
626 * - if all of the input layers are either not inplace or have no
627 * restrictions, then this layer can operate in-place.
629 * @note Conditions to decide the type of inplace for this layer:
630 * This is a generic case, and always restrictions on the next nodes to be
633 * @note This logic is prone to change as more layers are allowed to
634 * work in-place such as concat layer, split layer, addition layer, dropout
637 * @todo This logic sets layers to in-place one-by-one as they arrive. However
638 * setting some layers to in-place can save more memory than others (like
639 * multiout layer vs activation layer). The layers need to sorted based on the
640 * memory save they provide and then make them in-place in that order.
642 if (lnode->getType() == ActivationLayer::type ||
643 lnode->getType() == BatchNormalizationLayer::type ||
644 lnode->getType() == LayerNormalizationLayer::type) {
645 for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
647 if (getLayerNode(lnode->getInputConnectionName(i))->executeInPlace() ==
648 InPlace::RESTRICTING)
649 return InPlace::NONE;
653 * if the layer does io_independent_backwarding where the input and output
654 * is not required during backwarding, then it is a non-restricting in-place
657 if (io_independent_backwarding(lnode))
658 return InPlace::NON_RESTRICTING;
660 return InPlace::RESTRICTING;
663 return InPlace::NONE;
666 void NetworkGraph::inPlaceOptimize() {
667 if (optimize_memory) {
668 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
669 auto const &lnode = getSortedLayerNode(idx);
670 lnode->executeInPlace(canExecuteInPlace(lnode));
676 * @brief Set the Inplace Shared Memory Config By Layer object
678 * @param lnode layer node object
679 * @param shared_var if the variable should be shared
680 * @param shared_grad if the gradient should be shared
683 setInplaceSharedMemoryConfigByLayer(const std::shared_ptr<LayerNode> &lnode,
684 bool &shared_var, bool &shared_grad) {
685 /** for multiout layer, variables are shared but gradients are not */
686 if (lnode->getType() == MultiOutLayer::type) {
693 /** @todo for addition layer, variables are not shared but gradients are */
695 * @todo for layers which support in-place, both variables and gradients
698 * @todo add a check here is the layer being checked here can support
703 std::vector<Var_Grad *>
704 NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
705 const std::vector<Var_Grad *> &prev_inputs) {
706 const GraphNode &gnode = *lnode.get();
707 std::vector<TensorDim> input_dims;
708 input_dims.reserve(prev_inputs.size());
709 std::transform(prev_inputs.begin(), prev_inputs.end(),
710 std::back_inserter(input_dims),
711 [](const Var_Grad *vg) { return vg->getDim(); });
713 /** finalize the layer and get the final context */
714 auto init_context = lnode->finalize(input_dims);
717 * Request manager for either a pre-allocated output as input or a newly
718 * allocated output. This is necessary for manager to know when this output
719 * node is going to be used.
721 std::vector<std::string> input_names;
722 input_names.reserve(prev_inputs.size());
723 std::transform(prev_inputs.begin(), prev_inputs.end(),
724 std::back_inserter(input_names),
725 [](auto const &vg) { return vg->getName(); });
726 const std::vector<Var_Grad *> &inputs = tensor_manager->requestInputs(
727 gnode, init_context.getInputDimensions(), input_names);
729 /** In-Place optimizations */
731 * Request manager for either a pre-allocated input as output or a newly
732 * allocated output. This is necessary for manager to know when this output
733 * node is going to be used with in-place optimizations.
735 auto out_specs = init_context.getOutSpecs();
736 /// @note try move inplace control to finalize
737 bool shared_var = false, shared_grad = false;
738 if (lnode->executeInPlace() != InPlace::NONE) {
739 setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
740 for (unsigned int i = 0; i < out_specs.size(); ++i) {
741 auto &s = out_specs.at(i);
743 s.variable_spec.request_type =
744 TensorSpecV2::RequestType::READ_ONLY_VIEW;
745 if (lnode->getType() == IdentityLayer::type) {
746 s.variable_spec.reference_name = inputs[i]->getName();
748 s.variable_spec.reference_name = inputs[0]->getName();
751 if (shared_grad && s.gradient_spec) {
752 s.gradient_spec->request_type =
753 TensorSpecV2::RequestType::READ_ONLY_VIEW;
754 if (lnode->getType() == IdentityLayer::type) {
755 s.gradient_spec->reference_name = inputs[i]->getGradientName();
757 s.gradient_spec->reference_name = inputs[0]->getGradientName();
762 if (lnode->requireLabel()) {
763 NNTR_THROW_IF(out_specs.size() != 1, std::invalid_argument)
764 << "out specification size must be 1 for label layer for now, "
765 << lnode->getName() << " out spec size: " << out_specs.size();
766 NNTR_THROW_IF(out_specs[0].gradient_spec == nullptr, std::invalid_argument)
767 << "label space does not exist for " << lnode->getName();
768 out_specs[0].gradient_spec->request_type =
769 TensorSpecV2::RequestType::PLACEHOLDER;
772 /// @note below needs to be enabled only for inference mode, but need decision
773 /// if we are going to separate inference initialization from train
774 /// initialization this might not worth optimize because in general output of
775 /// a neuralnet is very small
776 if (lnode->getOutputConnections().size() == 0u) {
777 std::for_each(out_specs.begin(), out_specs.end(),
778 [this](VarGradSpecV2 &spec) {
779 spec.variable_spec.additional_exec_order.push_back(
780 std::get<0>(forward_iter_end->getExecutionOrder()));
784 if (lnode->getType() == RNNCellLayer::type or
785 lnode->getType() == LSTMCellLayer::type or
786 lnode->getType() == GRUCellLayer::type) {
788 out_specs.begin(), out_specs.end(), [this](VarGradSpecV2 &spec) {
789 spec.variable_spec.ls = TensorLifespan::FORWARD_GRAD_LIFESPAN;
793 const std::vector<Var_Grad *> &outputs = tensor_manager->requestTensors(
794 out_specs, Manager::TensorGroupType::OUTPUT, lnode->getExecutionOrder(),
797 /** create shared weight names if requested */
798 std::vector<std::string> shared_weight_names;
799 std::vector<std::string> shared_tensor_names;
800 if (auto shared_node_str = lnode->getSharedFrom(); !shared_node_str.empty()) {
801 /// @note below is commented but kept from quick fix to be referenced for
803 // auto shared_node = getLayerNode(shared_node_str).get();
804 // NNTR_THROW_IF(shared_node == nullptr, std::invalid_argument)
805 // << "shared_node requested but it is not registered in the graph,
808 // << shared_node_str << " requested from " << lnode->getName();
809 // NNTR_THROW_IF(shared_node->getType() != lnode->getType(),
810 // std::invalid_argument)
811 // << " shared_node and lnode type mismatch, source node type: "
812 // << shared_node->getType() << " depedent node type: " <<
814 // << " depedent node name: " << lnode->getName();
815 // NNTR_THROW_IF(!shared_node->isFinalized(), std::invalid_argument)
816 // << "shared node must be prior to the dependent node and it should be
818 // "finalized beforehand, shared node name: "
819 // << shared_node_str << " dependent node name: " << lnode->getName();
820 // auto num_weight = shared_node->getNumWeights();
821 // shared_weight_names.reserve(num_weight);
822 // for (auto i = 0u; i < num_weight; ++i) {
823 // shared_weight_names.emplace_back(shared_node->getWeightName(i));
825 // auto &rc = node->getRunContext();
827 /// @fixme tensor should be only shared if context explicitly requested to
828 /// do so. This has to be added to the part of tensor spec, other wise it
829 /// will break many things
830 const auto &t_specs = init_context.getTensorsSpec();
831 for (auto i = 0u; i < t_specs.size(); ++i) {
832 shared_tensor_names.emplace_back(std::get<3>(t_specs.at(i)));
835 const auto &w_specs = init_context.getWeightsSpec();
836 for (auto i = 0u; i < w_specs.size(); ++i) {
837 shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
841 lnode->configureRunContext(
842 // TODO: update weights spec for trainable based on layer trainable prop
843 tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
844 lnode->getTrainable(), shared_weight_names),
846 tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
847 lnode->getTrainable(), shared_tensor_names));
852 int NetworkGraph::initialize(const std::vector<Connection> &model_input_names,
853 const std::vector<Connection> &model_label_names) {
856 * this contains the map from node name to its input tensor names
857 * @note: these input tensors have already been allocated
859 std::unordered_map<std::string, std::vector<Var_Grad *>> input_map;
861 /** check if the given config of node is of input node */
862 auto is_input_node = [](const LayerNode *node) -> bool {
863 return node->getInputConnections().empty();
866 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
867 std::vector<Var_Grad *> inputs = {};
868 auto const &lnode = getSortedLayerNode(idx);
870 if (profile_keys.find(lnode->getType()) == profile_keys.end()) {
872 PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType());
873 profile_keys[lnode->getType()] = event_key;
877 * Set input dimension for all the layers.
878 * For input layer, as input dimension is known, set input tensor.
880 if (!is_input_node(lnode.get())) {
881 if (input_map.find(lnode->getName()) == input_map.end())
882 throw std::runtime_error("Cannot find input buffers for the node");
883 inputs = input_map.at(lnode->getName());
887 * Initialize all the layers, allocate output tensors for each layer
888 * init2and add optimizer related weights for the layer
890 const std::vector<Var_Grad *> &outputs = finalizeContext(lnode, inputs);
892 /** no need to update input_map for the last layer */
893 if (idx == graph.size() - 1)
896 for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node;
898 auto conn = lnode->getOutputConnection(i);
900 ml_logi("out connection not defined for %s, %u",
901 lnode->getName().c_str(), i);
905 auto sink_node = getLayerNode(conn->getName());
906 [[maybe_unused]] auto [it, b] =
907 input_map.try_emplace({sink_node->getName(), {}});
909 NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) !=
911 std::invalid_argument)
912 << "node pair does not match between " << lnode->getName() << ' '
913 << sink_node->getName();
915 auto &sink_tensors = it->second;
916 sink_tensors.resize(sink_node->getNumInputConnections());
917 sink_tensors[conn->getIndex()] = outputs[i];
921 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
922 auto const &lnode = getSortedLayerNode(idx);
923 auto &rc = lnode->getRunContext();
924 auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
925 auto last_grad_access = std::get<3>(lnode->getExecutionOrder());
926 for (unsigned i = 0; i < rc.getNumWeights(); ++i) {
927 if (!rc.weightHasGradient(i)) {
928 /// @todo this is duck taping that MUST BE REMOVED. We will need to
929 /// have, is weight first access kind of concept.
930 if (tensor_manager->isFirstAccess(
931 rc.getWeight(i).getName(),
932 std::get<0>(lnode->getExecutionOrder()), true)) {
933 rc.getWeightObject(i).setAsGradientFirstAccess();
935 if (tensor_manager->isLastAccess(rc.getWeight(i).getName(),
936 last_grad_access, true)) {
937 rc.getWeightObject(i).setAsGradientLastAccess();
940 if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
941 first_grad_access)) {
942 rc.getWeightObject(i).setAsGradientFirstAccess();
945 * if the gradient is to be clipped by global norm, then the last access
946 * is by clipping itself. However, as clipping is not a layer and does
947 * not contain any weights, such weights never get assigned
948 * gradient_last_access. This is a quick hotfix.
949 * TODO: make an independent clipping layer which will execute at the
950 * end, and will share ownership of weights which it will clip. This
951 * will remove this hot fix, and also remove the checks of if weights
954 if (tensor_manager->isLastAccess(rc.getWeightGrad(i).getName(),
956 (rc.isGradientClipByGlobalNorm(i) &&
957 tensor_manager->isSecondLastAccess(rc.getWeightGrad(i).getName(),
958 last_grad_access))) {
959 rc.getWeightObject(i).setAsGradientLastAccess();
964 /**** identify model input / output to be set externally later ****/
965 auto identify_as_model_input = [this](LayerNode *node) {
966 auto num_input = node->getNumInputs();
967 NNTR_THROW_IF(num_input != 1, std::invalid_argument)
968 << "Input layer is supposed to have exactly one input, but more then "
969 "one input detected, num inputs: "
972 input_list.push_back(node->getInput(0).getName());
973 input_dims.push_back(node->getInputDimensions()[0]);
976 auto is_label_node = [](LayerNode *node) { return node->requireLabel(); };
978 auto identify_as_model_label = [this](LayerNode *node) {
979 /// @todo change this as lnode->getNumLabels of sorts
980 auto num_label = node->getNumOutputs();
981 NNTR_THROW_IF(!node->getOutputConnections().empty(), std::invalid_argument)
982 << "label layer is supposed to be a leaf for now";
983 NNTR_THROW_IF(num_label != 1, std::invalid_argument)
984 << "label layer is supposed to have exactly one label, but more then "
985 "one label detected, num labels: "
988 /// @todo implement and use getLabel(0) instead.
989 output_list.push_back(node->getOutput(0).getName());
990 label_list.push_back(node->getOutputGrad(0).getName());
991 label_dims.push_back(node->getOutputDimensions()[0]);
994 auto identify_external_tensors = [this](const std::vector<Connection> &conns,
995 auto &&pred, auto &&identify) {
997 for (unsigned int i = 0; i < graph.size(); ++i) {
998 auto lnode = getSortedLayerNode(i).get();
1002 /// when name is empty, we identify everything as the node, all of
1003 /// them must be having identical dimensions
1007 for (auto &conn : conns) {
1008 auto lnode = getLayerNode(conn.getName()).get();
1009 NNTR_THROW_IF(!pred(lnode), std::invalid_argument)
1010 << "given node is not of that kind, name: " << conn.getName();
1013 unsigned int num_node_of_kind = 0;
1014 for (unsigned int i = 0; i < graph.size(); ++i) {
1015 auto lnode = getSortedLayerNode(i).get();
1021 NNTR_THROW_IF(num_node_of_kind != conns.size(), std::invalid_argument)
1022 << "conns given but there are not identified node of the kind, num "
1024 << num_node_of_kind << " identifier size: " << conns.size();
1028 identify_external_tensors(model_input_names, is_input_node,
1029 identify_as_model_input);
1030 identify_external_tensors(model_label_names, is_label_node,
1031 identify_as_model_label);
1033 /** mark the nodes which will be backwarded during the graph operation */
1035 markNodesForBackwarding();
1036 backward_iter_end = computeBackwardEnd();
1037 } catch (std::exception &e) {
1039 "Backwarding required from layer which doesn't support backwarding: %s",
1041 return ML_ERROR_INVALID_PARAMETER;
1044 /** select weights which would require clipping of the gradients by global
1046 clip_weights = tensor_manager->getWeights([](const Weight *w) {
1047 return w->hasGradient() && w->isGradientLastAccess() &&
1048 w->isGradientClipByGlobalNorm();
1051 return ML_ERROR_NONE;
1054 void NetworkGraph::setExternalTensors(const std::vector<Tensor> &data,
1055 const std::vector<std::string> names) {
1057 /// feed or clear label
1058 for (unsigned int idx = 0; idx < names.size(); idx++) {
1060 tensor_manager->fillPlaceholder(names[idx], Tensor());
1061 else if (data.size() == 1)
1062 tensor_manager->fillPlaceholder(names[idx], data[0]);
1064 tensor_manager->fillPlaceholder(names[idx], data[idx]);
1068 void NetworkGraph::setInputsLabels(const std::vector<Tensor> &inputs,
1069 const std::vector<Tensor> &labels) {
1071 NNTR_THROW_IF(labels.size() > 1 && labels.size() != label_list.size(),
1072 std::invalid_argument)
1073 << "label size does not match with the network requirements"
1074 << " label size: " << labels.size()
1075 << " requirements size: " << label_list.size();
1077 NNTR_THROW_IF(inputs.size() > 1 && inputs.size() != input_list.size(),
1078 std::invalid_argument)
1079 << "input size does not match with the network requirements"
1080 << " input size: " << inputs.size()
1081 << " requirements size: " << input_list.size();
1083 setExternalTensors(inputs, input_list);
1084 setExternalTensors(labels, label_list);
1087 void NetworkGraph::setInputsLabels(sharedConstTensors &inputs,
1088 sharedConstTensors &labels) {
1090 std::vector<Tensor> ins;
1091 std::transform(inputs.begin(), inputs.end(), std::back_inserter(ins),
1092 [](auto const &val) { return *val.get(); });
1094 std::vector<Tensor> labs;
1095 std::transform(labels.begin(), labels.end(), std::back_inserter(labs),
1096 [](auto const &val) { return *val.get(); });
1098 setInputsLabels(ins, labs);
1101 std::vector<Tensor> NetworkGraph::getOutputTensors() const {
1102 std::vector<Tensor> output_tensors;
1103 output_tensors.reserve(output_list.size());
1105 for (auto const &name : output_list)
1106 output_tensors.push_back(*tensor_manager->getTensor(name));
1108 return output_tensors;
1111 void NetworkGraph::flushCache() { tensor_manager->flushCache(); }
1113 void NetworkGraph::flushCacheExcept(unsigned int order) {
1114 tensor_manager->flushCacheExcept(order);
1117 void NetworkGraph::requestOptimizerVariable(
1118 std::function<std::vector<TensorDim>(const TensorDim &)> cb,
1119 bool request_only_trainable) {
1120 for (auto const &w : tensor_manager->getWeights()) {
1121 if (w->isGradientLastAccess() && w->hasGradient()) {
1122 const TensorDim &dim = w->getDim();
1123 std::vector<TensorDim> dims = cb(dim);
1124 w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
1125 dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
1126 w->isGradientClipByGlobalNorm(), Tensor::Initializer::ZEROS));
1131 } /* namespace nntrainer */