1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2020 Jijoong Moon <jijoong.moon@samsung.com>
5 * @file network_graph.h
7 * @see https://github.com/nnstreamer/nntrainer
8 * @author Jijoong Moon <jijoong.moon@samsung.com>
9 * @bug No known bugs except for NYI items
10 * @brief This is Network Graph Class for Neural Network
12 * @todo Support multi-input graph.
20 #include <activation_layer.h>
21 #include <addition_layer.h>
23 #include <concat_layer.h>
24 #include <connection.h>
25 #include <cross_entropy_loss_layer.h>
26 #include <cross_entropy_sigmoid_loss_layer.h>
27 #include <cross_entropy_softmax_loss_layer.h>
28 #include <flatten_layer.h>
29 #include <identity_layer.h>
30 #include <input_layer.h>
31 #include <layer_node.h>
32 #include <layer_normalization_layer.h>
33 #include <multiout_layer.h>
34 #include <network_graph.h>
35 #include <nntrainer_error.h>
36 #include <nntrainer_log.h>
39 #include <split_layer.h>
40 #include <time_dist.h>
41 #include <util_func.h>
43 #define LNODE(x) std::static_pointer_cast<LayerNode>(x)
47 int NetworkGraph::compile(const std::string &loss_type) {
48 int status = ML_ERROR_NONE;
50 status = isCompilable();
54 setOutputConnections();
55 } catch (std::exception &e) {
56 ml_loge("setting output layer failed, reason: %s", e.what());
57 return ML_ERROR_INVALID_PARAMETER;
60 graph.realizeInputOutputNode();
63 /// @todo realize loss beforehand
64 status = addLossLayer(loss_type);
66 } catch (const std::exception &e) {
67 ml_loge("%s", e.what());
68 status = ML_ERROR_INVALID_PARAMETER;
72 graph.topologicalSort();
75 forward_iter_end = (*(cend() - 1)).get();
79 status = checkCompiledGraph();
87 void NetworkGraph::setExecutionOrder() {
88 auto max_count = graph.size() * 3;
89 /** @todo: remove backwarding count for non-trainble layers */
90 for (auto iter = cbegin(); iter != cend(); iter++) {
92 auto order_idx = iter - cbegin();
93 auto forward_order = order_idx;
94 auto calc_gradient_order = max_count - ((order_idx + 1) * 2);
95 /** calc derivative is called right after calc_gradient */
96 auto calc_derivative_order = calc_gradient_order + 1;
97 node->setExecutionOrder(
98 {forward_order, calc_gradient_order, calc_derivative_order});
102 * This sets max execution order temporarily till model is initialized.
103 * This set max execution order is used to extend gradient exec orders for
106 graph_exec_end = std::get<2>((*(cbegin()))->getExecutionOrder());
109 void NetworkGraph::addLayerNode(std::unique_ptr<Layer> layer) {
110 graph.addNode(std::make_unique<LayerNode>(std::move(layer)));
113 int NetworkGraph::addLossLayer(const std::string &loss_type_) {
114 for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
115 auto output_layer_node = LNODE(graph.getOutputNode(i));
116 std::string loss_type = loss_type_;
118 if (output_layer_node->requireLabel())
121 if (loss_type.empty())
124 auto second_to_last_layer_node = output_layer_node;
125 bool is_cross_entropy_loss =
126 istrequal(loss_type, CrossEntropyLossLayer::type);
127 if (is_cross_entropy_loss) {
128 auto type = output_layer_node->getType();
130 if (type != ActivationLayer::type) {
131 throw exception::not_supported(
132 "Error: Cross Entropy need last layer to have softmax or sigmoid"
136 switch (output_layer_node->getActivationType()) {
137 case ActivationType::ACT_SIGMOID:
138 loss_type = CrossEntropySigmoidLossLayer::type;
140 case ActivationType::ACT_SOFTMAX:
141 loss_type = CrossEntropySoftmaxLossLayer::type;
144 throw exception::not_supported(
145 "Error: Cross Entropy not supported without softmax or sigmoid.");
148 second_to_last_layer_node =
149 LNODE(graph.getNode(output_layer_node->getInputConnectionName(0)));
152 std::shared_ptr<LayerNode> lnode = createLayerNode(loss_type);
153 graph.ensureName(*lnode);
155 if (second_to_last_layer_node->getDistribute()) {
156 lnode->setProperty({"distribute=true"});
159 /// @todo remove this by add loss at realization
160 second_to_last_layer_node->setOutputLayers({lnode->getName()});
162 {"input_layers=" + second_to_last_layer_node->getName()});
164 if (is_cross_entropy_loss) {
165 graph.replaceNode(output_layer_node, lnode);
167 graph.addNode(lnode, false);
169 graph.replaceOutputNode(i, lnode);
172 return ML_ERROR_NONE;
175 void NetworkGraph::setOutputConnections() {
176 for (auto layer_iter = cbegin(); layer_iter != cend(); layer_iter++) {
177 const auto &node = *layer_iter;
178 for (auto i = 0u, num_inode = node->getNumInputConnections(); i < num_inode;
180 const auto &name = node->getInputConnectionName(i);
181 const auto &idx = node->getInputConnectionIndex(i);
183 auto node_setting_output = getLayerNode(name);
184 node_setting_output->setOutputConnection(idx, node->getName(), i);
189 int NetworkGraph::isCompilable() {
191 ml_loge("Graph is already compiled");
192 return ML_ERROR_NOT_SUPPORTED;
196 ml_loge("Graph is empty");
197 return ML_ERROR_INVALID_PARAMETER;
200 return ML_ERROR_NONE;
203 int NetworkGraph::checkCompiledGraph() {
204 /** Dimension of input layers must be known */
205 for (auto iter = cbegin(); iter != cend(); iter++) {
206 auto lnode = (*iter);
207 if (lnode->getNumInputConnections() == 0) {
208 if (!lnode->hasInputShapeProperty()) {
209 ml_loge("Layer with no inbound connection need input_shape property");
210 return ML_ERROR_INVALID_PARAMETER;
215 return ML_ERROR_NONE;
218 void NetworkGraph::markNodesForBackwarding() {
219 /** accumulate all the nodes which must support backwarding */
220 std::unordered_set<std::string> must_support_backwarding;
223 * if a node is trainable, then all the nodes ahead of it must support
224 * backwarding operation
226 for (auto iter = cbegin(); iter != cend(); iter++) {
227 auto lnode = (*iter);
228 if (lnode->getTrainable() ||
229 must_support_backwarding.find(lnode->getName()) !=
230 must_support_backwarding.end()) {
231 if (lnode->getTrainable()) {
232 lnode->needsCalcGradient(true);
235 if (lnode->supportBackwarding() && !optimize_memory) {
236 lnode->needsCalcDerivative(true);
240 for (auto i = 0u, num_node = lnode->getNumOutputConnections();
242 auto conn = lnode->getOutputConnection(i);
247 must_support_backwarding.insert(conn->getName());
252 /** mark all the required nodes support backwarding */
253 for (auto const &node_name : must_support_backwarding) {
254 auto ln = LNODE(graph.getNode(node_name)).get();
255 ln->needsCalcDerivative(true);
259 void NetworkGraph::setBatchSize(unsigned int batch_size) {
260 if (batch_size == this->batch_size)
263 this->batch_size = batch_size;
264 if (!input_list.empty() && getInputDimension()[0].batch() == batch_size)
267 auto allocated = tensor_manager->isAllocated();
272 for (auto iter = cbegin(); iter != cend(); iter++) {
273 if ((*iter)->isFinalized()) {
274 /// resize tensors spec
275 /// @todo remove below, if cutsom tensor needs to change dimension
276 /// according to the tensor, it must be done explicitly, or at least have
277 /// a property to control the behavior
278 const RunLayerContext &context = (*iter)->getRunContext();
279 for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
280 auto const &ts = context.getTensor(idx);
281 tensor_manager->setBatchSize(ts.getName(), ts.getDim().batch());
282 if (context.tensorHasGradient(idx)) {
283 auto const &ts_grad = context.getTensorGrad(idx);
284 tensor_manager->setBatchSize(ts_grad.getName(),
285 ts_grad.getDim().batch());
288 /// override setting batch as per request
289 (*iter)->setBatch(batch_size);
292 /// resize input and output spec
293 tensor_manager->setBatchSize(batch_size);
296 allocateTensors(exec_mode);
298 /** update input and label dimensions */
299 for (unsigned int idx = 0; idx < input_list.size(); idx++)
300 input_dims[idx] = tensor_manager->getTensor(input_list[idx])->getDim();
301 for (unsigned int idx = 0; idx < label_list.size(); idx++)
302 label_dims[idx] = tensor_manager->getTensor(label_list[idx])->getDim();
305 void NetworkGraph::applyGradients(
306 LayerNode *node, const std::function<void(Weight &)> &apply_func) {
307 auto &rc = node->getRunContext();
308 auto num_weight = rc.getNumWeights();
309 for (unsigned i = 0; i < num_weight; ++i) {
310 if (!rc.weightHasGradient(i)) {
314 if (!rc.isGradientLastAccess(i)) {
315 /// @note instead of checking the last access of the weight, checking
316 /// if weights are dependent to others to minimize overhead.
317 /// this logic assums that the source of the dependent weight must be
318 /// prior to the dependent.
322 if (rc.isGradientClipByGlobalNorm(i)) {
324 * @note the weights whose gradient are to be clipped by global norm will
325 * be clipped at once at the end of iteration and applied then.
330 apply_func(rc.getWeightObject(i));
335 NetworkGraph::forwarding(bool training,
336 std::function<bool(void *userdata)> stop_cb) {
337 for (auto iter = cbegin(); iter != cend() && !stop_cb(nullptr); iter++) {
338 auto const &ln = *iter;
339 PROFILE_TIME_START(profile_keys.at(ln->getType()));
340 PROFILE_MEM_ANNOTATE("Forwarding for layer: " + ln->getName());
342 auto f = std::get<0>(ln->getExecutionOrder());
345 ln->forwarding(training);
347 PROFILE_TIME_END(profile_keys.at(ln->getType()));
350 sharedConstTensors out;
351 for (unsigned int i = 0; i < graph.getNumOutputNodes(); ++i) {
352 auto const &output_layer_node = LNODE(graph.getOutputNode(i));
353 for (unsigned int j = 0; j < output_layer_node->getNumOutputs(); ++j) {
354 out.push_back(MAKE_SHARED_TENSOR(output_layer_node->getOutput(j)));
361 void NetworkGraph::backwarding(
363 std::function<void(std::shared_ptr<LayerNode>, int)> &backwarding_op,
364 std::function<void(Weight &, int)> &apply_grad_clip_op,
365 std::function<bool(void *userdata)> stop_cb) const {
367 * last layer backwarding is run out of this loop
369 auto iter_begin = getBackwardingBeginIter();
370 auto iter_end = getBackwardingEndIter();
372 /// there is no layer to train, so backwarding is essentially noop
373 if (iter_begin == iter_end) {
377 auto const &lptr_begin = (*iter_begin);
379 if (lptr_begin->requireLabel() == false)
380 throw std::runtime_error(
381 "Error: last layer does not accept label, we can't train");
383 for (auto iter = iter_begin; iter != iter_end && !stop_cb(nullptr); iter++) {
385 PROFILE_TIME_START(profile_keys.at(ln->getType()));
386 backwarding_op(ln, iteration);
387 PROFILE_TIME_END(profile_keys.at(ln->getType()));
390 /** perform clipping of the gradients by global norm if any */
391 if (clip_weights.empty())
394 /** calculate the global norm */
395 Tensor global_norm_t(
396 TensorDim({1u, 1u, 1u, (unsigned int)clip_weights.size()}));
397 float *global_norm_data = global_norm_t.getData();
398 for (unsigned int idx = 0; idx < clip_weights.size(); idx++) {
399 auto const &w = clip_weights[idx];
400 global_norm_data[idx] = w->getGradientNorm();
402 float global_norm = global_norm_t.l2norm();
403 /** apply the gradient with the above global norm */
404 for (auto w : clip_weights) {
405 w->clipGradientByGlobalNorm(global_norm);
407 /** apply the gradient with the above global norm */
408 for (auto w : clip_weights) {
409 apply_grad_clip_op(*w, iteration);
413 LayerNode *NetworkGraph::computeBackwardEnd() {
414 int max_exec_order = -1;
415 LayerNode *node = nullptr;
417 if (!optimize_memory) {
418 return (*cbegin()).get();
421 for (auto iter = getBackwardingBeginIter(); iter != getBackwardingEndIter();
424 const auto &exec_order = ln->getExecutionOrder();
425 int cur_order = std::get<0>(exec_order);
426 if (ln->needsCalcDerivative() || ln->needsCalcGradient()) {
428 cur_order = std::get<2>(exec_order);
430 cur_order = std::get<1>(exec_order);
434 NNTR_THROW_IF(max_exec_order == cur_order, std::invalid_argument)
435 << "layer node: " << ln->getName()
436 << " has duplicated max_exec_order, this should not happen, current "
440 if (max_exec_order < cur_order) {
441 max_exec_order = cur_order;
450 * @brief Allocate memory for all the managed tensors
452 void NetworkGraph::allocateTensors(ExecutionMode exec_mode_) {
453 exec_mode = exec_mode_;
454 if (exec_mode == ExecutionMode::INFERENCE)
456 * get the order of execution/usage order for the forwarding of the last
457 * layer and pass that as the max_exec_order ensuring that all tensors
458 * with usage less than the max_exec_order are allocated.
460 tensor_manager->allocateTensors(
461 std::get<0>((*(cend() - 1))->getExecutionOrder()));
464 * get the order of execution/usage order for the backwarding of the first
465 * layer (as that will be the last layer to executed in the backwarding)
466 * and pass that as the max_exec_order ensuring that all tensors with
467 * usage less than the max_exec_order are allocated.
469 tensor_manager->allocateTensors(
470 std::get<2>(backward_iter_end->getExecutionOrder()));
474 std::vector<TensorDim> NetworkGraph::getInputDimension() const {
475 NNTR_THROW_IF(input_dims.empty(), std::invalid_argument)
476 << "[NetworkGraph] the graph has no node identified as input!";
480 unsigned int NetworkGraph::getBatchSize() const { return batch_size; }
482 std::vector<TensorDim> NetworkGraph::getOutputDimension() const {
483 NNTR_THROW_IF(label_dims.empty(), std::invalid_argument)
484 << "[NetworkGraph] the graph has no node identified as output!";
485 /// for now, outputting label_dims works, later label dim will be different
486 /// from output dimension
490 std::vector<std::shared_ptr<LayerNode>>
491 NetworkGraph::getUnsortedLayers(const std::string &input_layer,
492 const std::string &output_layer) const {
493 /// @fixme: this won't work if input, output layers are not in order
494 /// Further, this function must be removed. There should be rather
495 /// getAllNames and getLayerByName instead of getUnsortedLayers.
497 /** count layers after output layer */
498 unsigned int num_layers_remove_end = 0;
499 if (!output_layer.empty()) {
500 for (auto iter = graph.crbegin(); iter != graph.crend(); iter++) {
501 if ((*iter)->getName() != output_layer)
502 num_layers_remove_end++;
508 if (num_layers_remove_end == graph.size())
511 /** count layers before input layer */
512 unsigned int num_layers_remove_start = 0;
513 if (!input_layer.empty()) {
514 for (auto iter = graph.cbegin();
515 iter != graph.cend() - num_layers_remove_end; iter++) {
516 if ((*iter)->getName() != input_layer)
517 num_layers_remove_start++;
523 /** copy the graph and return */
524 std::vector<std::shared_ptr<LayerNode>> ret;
525 std::transform(graph.cbegin() + num_layers_remove_start,
526 graph.cend() - num_layers_remove_end, std::back_inserter(ret),
527 [](auto const &elem) { return LNODE(elem); });
532 std::vector<std::shared_ptr<LayerNode>> NetworkGraph::getLayerNodes() const {
533 return std::vector<std::shared_ptr<LayerNode>>(cbegin(), cend());
536 void NetworkGraph::addLayer(std::shared_ptr<LayerNode> layer) {
538 throw std::runtime_error("Cannot modify graph after compile");
540 /** Insert the layer to the graph */
541 graph.addNode(layer);
545 NetworkGraph::canExecuteInPlace(const std::shared_ptr<LayerNode> &lnode) {
546 if (!lnode->supportInPlace())
547 return InPlace::NONE;
549 /** layers which behave as a no-op - flatten */
550 auto no_op = [](const std::shared_ptr<LayerNode> &lnode) {
551 return lnode->getType() == FlattenLayer::type ||
552 lnode->getType() == IdentityLayer::type;
555 /** layers which behave as a no-op but shares memory among parallel nodes -
557 auto no_op_shared = [](const std::shared_ptr<LayerNode> &lnode) {
558 return lnode->getType() == MultiOutLayer::type;
562 * layers whose backwarding is not dependent on input/output but only its
563 * derivatives and weights, if any - batch normalization
565 auto io_independent_backwarding =
566 [](const std::shared_ptr<LayerNode> &lnode) {
567 return (lnode->getType() == BatchNormalizationLayer::type) ||
568 (lnode->getType() == LayerNormalizationLayer::type);
572 * @note Conditions to decide if this layer node can be in-place:
573 * 1. if the layer is a no-op, then it can operate in-place as it is not
574 * modifying its input/output tensors and does not need to check its
575 * neighboring nodes for dependency.
576 * 2. if the layer is not supporting backwarding, there is no dependency
577 * requirement with other nodes for backwarding.
579 * @note Conditions to decide the type of inplace for this layer:
580 * 1. if the previous layers were restricting, then this layer will also be
582 * 2. if the previous layer were non_restricting or not inplace, then this
583 * layer will be non-restricting.
585 if (no_op(lnode) || !lnode->supportBackwarding()) {
586 for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
588 const auto &input_name = lnode->getInputConnectionName(i);
589 if (getLayerNode(input_name)->executeInPlace() == InPlace::RESTRICTING)
590 return InPlace::RESTRICTING;
592 return InPlace::NON_RESTRICTING;
596 * @note Conditions to decide if this layer node can be in-place:
597 * if the layer is a no-op-shared, then it can operate in-place as it is not
598 * modifying its input/output tensors and does not need to check its
599 * neighboring nodes for dependency.
601 * @note Conditions to decide the type of inplace for this layer:
602 * As all the output nodes are sharing memory, the output nodes cant execute
603 * inplace, and then its restricting mode.
605 if (no_op_shared(lnode))
606 return InPlace::RESTRICTING;
609 * @note Conditions to decide if this layer node can be in-place:
610 * This is a generic case where the layer can support in-place but will
611 * modify its input in-place. This includes layers like activation, etc.
612 * Apply checks below to ensure that the layers can work in-place:
613 * - if any of the input layer are restriction, then this layer cannot work
614 * as layers behind this layer have added restrictions.
615 * - if all of the input layers are either not inplace or have no
616 * restrictions, then this layer can operate in-place.
618 * @note Conditions to decide the type of inplace for this layer:
619 * This is a generic case, and always restrictions on the next nodes to be
622 * @note This logic is prone to change as more layers are allowed to
623 * work in-place such as concat layer, split layer, addition layer, dropout
626 * @todo This logic sets layers to in-place one-by-one as they arrive. However
627 * setting some layers to in-place can save more memory than others (like
628 * multiout layer vs activation layer). The layers need to sorted based on the
629 * memory save they provide and then make them in-place in that order.
631 if (lnode->getType() == ActivationLayer::type ||
632 lnode->getType() == BatchNormalizationLayer::type ||
633 lnode->getType() == LayerNormalizationLayer::type) {
634 for (auto i = 0u, num_node = lnode->getNumInputConnections(); i < num_node;
636 if (getLayerNode(lnode->getInputConnectionName(i))->executeInPlace() ==
637 InPlace::RESTRICTING)
638 return InPlace::NONE;
642 * if the layer does io_independent_backwarding where the input and output
643 * is not required during backwarding, then it is a non-restricting in-place
646 if (io_independent_backwarding(lnode))
647 return InPlace::NON_RESTRICTING;
649 return InPlace::RESTRICTING;
652 return InPlace::NONE;
655 void NetworkGraph::inPlaceOptimize() {
656 if (optimize_memory) {
657 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
658 auto const &lnode = getSortedLayerNode(idx);
659 lnode->executeInPlace(canExecuteInPlace(lnode));
665 * @brief Set the Inplace Shared Memory Config By Layer object
667 * @param lnode layer node object
668 * @param shared_var if the variable should be shared
669 * @param shared_grad if the gradient should be shared
672 setInplaceSharedMemoryConfigByLayer(const std::shared_ptr<LayerNode> &lnode,
673 bool &shared_var, bool &shared_grad) {
674 /** for multiout layer, variables are shared but gradients are not */
675 if (lnode->getType() == MultiOutLayer::type) {
682 /** @todo for addition layer, variables are not shared but gradients are */
684 * @todo for layers which support in-place, both variables and gradients
687 * @todo add a check here is the layer being checked here can support
692 std::vector<Var_Grad *>
693 NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
694 const std::vector<Var_Grad *> &prev_inputs) {
695 const GraphNode &gnode = *lnode.get();
696 std::vector<TensorDim> input_dims;
697 input_dims.reserve(prev_inputs.size());
698 std::transform(prev_inputs.begin(), prev_inputs.end(),
699 std::back_inserter(input_dims),
700 [](const Var_Grad *vg) { return vg->getDim(); });
702 /** finalize the layer and get the final context */
703 auto init_context = lnode->finalize(input_dims);
706 * Request manager for either a pre-allocated output as input or a newly
707 * allocated output. This is necessary for manager to know when this output
708 * node is going to be used.
710 std::vector<std::string> input_names;
711 input_names.reserve(prev_inputs.size());
712 std::transform(prev_inputs.begin(), prev_inputs.end(),
713 std::back_inserter(input_names),
714 [](auto const &vg) { return vg->getName(); });
715 const std::vector<Var_Grad *> &inputs = tensor_manager->requestInputs(
716 gnode, init_context.getInputDimensions(), input_names);
718 /** In-Place optimizations */
720 * Request manager for either a pre-allocated input as output or a newly
721 * allocated output. This is necessary for manager to know when this output
722 * node is going to be used with in-place optimizations.
724 auto out_specs = init_context.getOutSpecs();
725 /// @note try move inplace control to finalize
726 bool shared_var = false, shared_grad = false;
727 if (lnode->executeInPlace() != InPlace::NONE) {
728 setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
729 for (unsigned int i = 0; i < out_specs.size(); ++i) {
730 auto &s = out_specs.at(i);
732 s.variable_spec.request_type =
733 TensorSpecV2::RequestType::READ_ONLY_VIEW;
734 if (lnode->getType() == IdentityLayer::type) {
735 s.variable_spec.reference_name = inputs[i]->getName();
737 s.variable_spec.reference_name = inputs[0]->getName();
740 if (shared_grad && s.gradient_spec) {
741 s.gradient_spec->request_type =
742 TensorSpecV2::RequestType::READ_ONLY_VIEW;
743 if (lnode->getType() == IdentityLayer::type) {
744 s.gradient_spec->reference_name = inputs[i]->getGradientName();
746 s.gradient_spec->reference_name = inputs[0]->getGradientName();
751 if (lnode->requireLabel()) {
752 NNTR_THROW_IF(out_specs.size() != 1, std::invalid_argument)
753 << "out specification size must be 1 for label layer for now, "
754 << lnode->getName() << " out spec size: " << out_specs.size();
755 NNTR_THROW_IF(out_specs[0].gradient_spec == nullptr, std::invalid_argument)
756 << "label space does not exist for " << lnode->getName();
757 out_specs[0].gradient_spec->request_type =
758 TensorSpecV2::RequestType::PLACEHOLDER;
761 /// @note below needs to be enabled only for inference mode, but need decision
762 /// if we are going to separate inference initialization from train
763 /// initialization this might not worth optimize because in general output of
764 /// a neuralnet is very small
765 if (lnode->getOutputConnections().size() == 0u) {
766 std::for_each(out_specs.begin(), out_specs.end(),
767 [this](VarGradSpecV2 &spec) {
768 spec.variable_spec.additional_exec_order.push_back(
769 std::get<0>(forward_iter_end->getExecutionOrder()));
773 const std::vector<Var_Grad *> &outputs = tensor_manager->requestTensors(
774 out_specs, Manager::TensorGroupType::OUTPUT, lnode->getExecutionOrder(),
777 /** create shared weight names if requested */
778 std::vector<std::string> shared_weight_names;
779 std::vector<std::string> shared_tensor_names;
780 if (auto shared_node_str = lnode->getSharedFrom(); !shared_node_str.empty()) {
781 /// @note below is commented but kept from quick fix to be referenced for
783 // auto shared_node = getLayerNode(shared_node_str).get();
784 // NNTR_THROW_IF(shared_node == nullptr, std::invalid_argument)
785 // << "shared_node requested but it is not registered in the graph,
788 // << shared_node_str << " requested from " << lnode->getName();
789 // NNTR_THROW_IF(shared_node->getType() != lnode->getType(),
790 // std::invalid_argument)
791 // << " shared_node and lnode type mismatch, source node type: "
792 // << shared_node->getType() << " depedent node type: " <<
794 // << " depedent node name: " << lnode->getName();
795 // NNTR_THROW_IF(!shared_node->isFinalized(), std::invalid_argument)
796 // << "shared node must be prior to the dependent node and it should be
798 // "finalized beforehand, shared node name: "
799 // << shared_node_str << " dependent node name: " << lnode->getName();
800 // auto num_weight = shared_node->getNumWeights();
801 // shared_weight_names.reserve(num_weight);
802 // for (auto i = 0u; i < num_weight; ++i) {
803 // shared_weight_names.emplace_back(shared_node->getWeightName(i));
805 // auto &rc = node->getRunContext();
807 /// @fixme tensor should be only shared if context explicitly requested to
808 /// do so. This has to be added to the part of tensor spec, other wise it
809 /// will break many things
810 const auto &t_specs = init_context.getTensorsSpec();
811 for (auto i = 0u; i < t_specs.size(); ++i) {
812 shared_tensor_names.emplace_back(std::get<3>(t_specs.at(i)));
815 const auto &w_specs = init_context.getWeightsSpec();
816 for (auto i = 0u; i < w_specs.size(); ++i) {
817 shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
821 lnode->configureRunContext(
822 // TODO: update weights spec for trainable based on layer trainable prop
823 tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
824 lnode->getTrainable(), shared_weight_names),
826 tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
827 lnode->getTrainable(), shared_tensor_names));
832 int NetworkGraph::initialize(const std::vector<Connection> &model_input_names,
833 const std::vector<Connection> &model_label_names) {
836 * this contains the map from node name to its input tensor names
837 * @note: these input tensors have already been allocated
839 std::unordered_map<std::string, std::vector<Var_Grad *>> input_map;
841 /** check if the given config of node is of input node */
842 auto is_input_node = [](const LayerNode *node) -> bool {
843 return node->getInputConnections().empty();
846 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
847 std::vector<Var_Grad *> inputs = {};
848 auto const &lnode = getSortedLayerNode(idx);
849 ml_logd("layer name : %s", lnode->getName().c_str());
851 if (profile_keys.find(lnode->getType()) == profile_keys.end()) {
853 PROFILE_TIME_REGISTER_EVENT(event_key, lnode->getType());
854 profile_keys[lnode->getType()] = event_key;
858 * Set input dimension for all the layers.
859 * For input layer, as input dimension is known, set input tensor.
861 if (!is_input_node(lnode.get())) {
862 if (input_map.find(lnode->getName()) == input_map.end())
863 throw std::runtime_error("Cannot find input buffers for the node");
864 inputs = input_map.at(lnode->getName());
868 * Initialize all the layers, allocate output tensors for each layer
869 * init2and add optimizer related weights for the layer
871 const std::vector<Var_Grad *> &outputs = finalizeContext(lnode, inputs);
873 /** no need to update input_map for the last layer */
874 if (idx == graph.size() - 1)
877 for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node;
879 auto conn = lnode->getOutputConnection(i);
881 ml_logi("out connection not defined for %s, %u",
882 lnode->getName().c_str(), i);
886 auto sink_node = getLayerNode(conn->getName());
887 [[maybe_unused]] auto [it, b] =
888 input_map.try_emplace({sink_node->getName(), {}});
890 NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) !=
892 std::invalid_argument)
893 << "node pair does not match between " << lnode->getName() << ' '
894 << sink_node->getName();
896 auto &sink_tensors = it->second;
897 sink_tensors.resize(sink_node->getNumInputConnections());
898 sink_tensors[conn->getIndex()] = outputs[i];
902 for (unsigned int idx = 0; idx < graph.size(); ++idx) {
903 auto const &lnode = getSortedLayerNode(idx);
904 auto &rc = lnode->getRunContext();
905 auto first_grad_access = std::get<1>(lnode->getExecutionOrder());
906 auto last_grad_access = std::get<2>(lnode->getExecutionOrder());
907 for (unsigned i = 0; i < rc.getNumWeights(); ++i) {
908 if (!rc.weightHasGradient(i)) {
909 /// @todo this is duck taping that MUST BE REMOVED. We will need to
910 /// have, is weight first access kind of concept.
911 if (tensor_manager->isFirstAccess(
912 rc.getWeight(i).getName(),
913 std::get<0>(lnode->getExecutionOrder()), true)) {
914 rc.getWeightObject(i).setAsGradientFirstAccess();
916 if (tensor_manager->isLastAccess(rc.getWeight(i).getName(),
917 last_grad_access, true)) {
918 rc.getWeightObject(i).setAsGradientLastAccess();
921 if (tensor_manager->isFirstAccess(rc.getWeightGrad(i).getName(),
922 first_grad_access)) {
923 rc.getWeightObject(i).setAsGradientFirstAccess();
926 * if the gradient is to be clipped by global norm, then the last access
927 * is by clipping itself. However, as clipping is not a layer and does
928 * not contain any weights, such weights never get assigned
929 * gradient_last_access. This is a quick hotfix.
930 * TODO: make an independent clipping layer which will execute at the
931 * end, and will share ownership of weights which it will clip. This
932 * will remove this hot fix, and also remove the checks of if weights
935 if (tensor_manager->isLastAccess(rc.getWeightGrad(i).getName(),
937 (rc.isGradientClipByGlobalNorm(i) &&
938 tensor_manager->isSecondLastAccess(rc.getWeightGrad(i).getName(),
939 last_grad_access))) {
940 rc.getWeightObject(i).setAsGradientLastAccess();
945 /**** identify model input / output to be set externally later ****/
946 auto identify_as_model_input = [this](LayerNode *node) {
947 auto num_input = node->getNumInputs();
948 NNTR_THROW_IF(num_input != 1, std::invalid_argument)
949 << "Input layer is supposed to have exactly one input, but more then "
950 "one input detected, num inputs: "
953 input_list.push_back(node->getInput(0).getName());
954 input_dims.push_back(node->getInputDimensions()[0]);
957 auto is_label_node = [](LayerNode *node) { return node->requireLabel(); };
959 auto identify_as_model_label = [this](LayerNode *node) {
960 /// @todo change this as lnode->getNumLabels of sorts
961 auto num_label = node->getNumOutputs();
962 NNTR_THROW_IF(!node->getOutputConnections().empty(), std::invalid_argument)
963 << "label layer is supposed to be a leaf for now";
964 NNTR_THROW_IF(num_label != 1, std::invalid_argument)
965 << "label layer is supposed to have exactly one label, but more then "
966 "one label detected, num labels: "
969 /// @todo implement and use getLabel(0) instead.
970 output_list.push_back(node->getOutput(0).getName());
971 label_list.push_back(node->getOutputGrad(0).getName());
972 label_dims.push_back(node->getOutputDimensions()[0]);
975 auto identify_external_tensors = [this](const std::vector<Connection> &conns,
976 auto &&pred, auto &&identify) {
978 for (unsigned int i = 0; i < graph.size(); ++i) {
979 auto lnode = getSortedLayerNode(i).get();
983 /// when name is empty, we identify everything as the node, all of
984 /// them must be having identical dimensions
988 for (auto &conn : conns) {
989 auto lnode = getLayerNode(conn.getName()).get();
990 NNTR_THROW_IF(!pred(lnode), std::invalid_argument)
991 << "given node is not of that kind, name: " << conn.getName();
994 unsigned int num_node_of_kind = 0;
995 for (unsigned int i = 0; i < graph.size(); ++i) {
996 auto lnode = getSortedLayerNode(i).get();
1002 NNTR_THROW_IF(num_node_of_kind != conns.size(), std::invalid_argument)
1003 << "conns given but there are not identified node of the kind, num "
1005 << num_node_of_kind << " identifier size: " << conns.size();
1009 identify_external_tensors(model_input_names, is_input_node,
1010 identify_as_model_input);
1011 identify_external_tensors(model_label_names, is_label_node,
1012 identify_as_model_label);
1014 /** mark the nodes which will be backwarded during the graph operation */
1016 markNodesForBackwarding();
1017 backward_iter_end = computeBackwardEnd();
1018 } catch (std::exception &e) {
1020 "Backwarding required from layer which doesn't support backwarding: %s",
1022 return ML_ERROR_INVALID_PARAMETER;
1025 /** select weights which would require clipping of the gradients by global
1027 clip_weights = tensor_manager->getWeights([](const Weight *w) {
1028 return w->hasGradient() && w->isGradientLastAccess() &&
1029 w->isGradientClipByGlobalNorm();
1032 return ML_ERROR_NONE;
1035 void NetworkGraph::setExternalTensors(const std::vector<Tensor> &data,
1036 const std::vector<std::string> names) {
1038 /// feed or clear label
1039 for (unsigned int idx = 0; idx < names.size(); idx++) {
1041 tensor_manager->fillPlaceholder(names[idx], Tensor());
1042 else if (data.size() == 1)
1043 tensor_manager->fillPlaceholder(names[idx], data[0]);
1045 tensor_manager->fillPlaceholder(names[idx], data[idx]);
1049 void NetworkGraph::setInputsLabels(const std::vector<Tensor> &inputs,
1050 const std::vector<Tensor> &labels) {
1052 NNTR_THROW_IF(labels.size() > 1 && labels.size() != label_list.size(),
1053 std::invalid_argument)
1054 << "label size does not match with the network requirements"
1055 << " label size: " << labels.size()
1056 << " requirements size: " << label_list.size();
1058 NNTR_THROW_IF(inputs.size() > 1 && inputs.size() != input_list.size(),
1059 std::invalid_argument)
1060 << "input size does not match with the network requirements"
1061 << " input size: " << inputs.size()
1062 << " requirements size: " << input_list.size();
1064 setExternalTensors(inputs, input_list);
1065 setExternalTensors(labels, label_list);
1068 void NetworkGraph::setInputsLabels(sharedConstTensors &inputs,
1069 sharedConstTensors &labels) {
1071 std::vector<Tensor> ins;
1072 std::transform(inputs.begin(), inputs.end(), std::back_inserter(ins),
1073 [](auto const &val) { return *val.get(); });
1075 std::vector<Tensor> labs;
1076 std::transform(labels.begin(), labels.end(), std::back_inserter(labs),
1077 [](auto const &val) { return *val.get(); });
1079 setInputsLabels(ins, labs);
1082 std::vector<Tensor> NetworkGraph::getOutputTensors() const {
1083 std::vector<Tensor> output_tensors;
1084 output_tensors.reserve(output_list.size());
1086 for (auto const &name : output_list)
1087 output_tensors.push_back(*tensor_manager->getTensor(name));
1089 return output_tensors;
1092 void NetworkGraph::flushCache() { tensor_manager->flushCache(); }
1094 void NetworkGraph::flushCacheExcept(unsigned int order) {
1095 tensor_manager->flushCacheExcept(order);
1098 void NetworkGraph::requestOptimizerVariable(
1099 std::function<std::vector<TensorDim>(const TensorDim &)> cb,
1100 bool request_only_trainable) {
1101 for (auto const &w : tensor_manager->getWeights()) {
1102 if (w->isGradientLastAccess() && w->hasGradient()) {
1103 const TensorDim &dim = w->getDim();
1104 std::vector<TensorDim> dims = cb(dim);
1105 w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
1106 dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
1107 Tensor::Initializer::ZEROS));
1112 } /* namespace nntrainer */