From 06646a1acf7d3ace6ed97b5f70e125ebd67aa62e Mon Sep 17 00:00:00 2001 From: Jihoon Lee Date: Tue, 23 Nov 2021 23:30:16 +0900 Subject: [PATCH] [graph] Use connection info for outputs This patch enable connection infromation to inbound connection starting from output while deprecating some unused methods **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- .../compiler/previous_input_realizer.cpp | 2 +- nntrainer/graph/network_graph.cpp | 52 +++++---- nntrainer/layers/layer_node.cpp | 101 +++++++++--------- nntrainer/layers/layer_node.h | 59 +++++----- 4 files changed, 113 insertions(+), 101 deletions(-) diff --git a/nntrainer/compiler/previous_input_realizer.cpp b/nntrainer/compiler/previous_input_realizer.cpp index 5131e2f3..19d40dd6 100644 --- a/nntrainer/compiler/previous_input_realizer.cpp +++ b/nntrainer/compiler/previous_input_realizer.cpp @@ -65,7 +65,7 @@ PreviousInputRealizer::realize(const GraphRepresentation &reference) { "being set ", node->getName().c_str(), prev_node->getName().c_str()); - node->setInputLayers({prev_node->getName()}); + node->setProperty({"input_layers=" + prev_node->getName()}); } return processed; diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index b5088790..1957bbc2 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include @@ -150,8 +152,10 @@ int NetworkGraph::addLossLayer(const std::string &loss_type_) { lnode->setProperty({"distribute=true"}); } + /// @todo remove this by add loss at realization second_to_last_layer_node->setOutputLayers({lnode->getName()}); - lnode->setInputLayers({second_to_last_layer_node->getName()}); + lnode->setProperty( + {"input_layers=" + second_to_last_layer_node->getName()}); if (is_cross_entropy_loss) { graph.replaceNode(output_layer_node, lnode); @@ -226,8 +230,15 @@ void NetworkGraph::markNodesForBackwarding() { lnode->needsCalcDerivative(true); } #endif - for (auto const &out_layer : lnode->getOutputLayers()) { - must_support_backwarding.insert(out_layer); + + for (auto i = 0u, num_node = lnode->getNumOutputConnections(); + i < num_node; ++i) { + auto conn = lnode->getOutputConnection(i); + if (!conn) { + continue; + } + + must_support_backwarding.insert(conn->getName()); } } } @@ -785,23 +796,28 @@ int NetworkGraph::initialize(const std::vector &model_input_names, if (idx == graph.size() - 1) break; - auto &output_layers = lnode->getOutputLayers(); - for (unsigned int i = 0; i < output_layers.size(); ++i) { - auto out_layer_node = getLayerNode(output_layers[i]); - if (input_map.find(output_layers[i]) == input_map.end()) - input_map.insert({output_layers[i], {}}); - - unsigned int j = 0; - for (; j < out_layer_node->getNumInputConnections(); ++j) { - if (istrequal(out_layer_node->getInputConnectionName(j), - lnode->getName())) { - break; - } + for (auto i = 0u, num_node = lnode->getNumOutputConnections(); i < num_node; + ++i) { + auto conn = lnode->getOutputConnection(i); + if (!conn) { + ml_logi("out connection not defined for %s, %u", + lnode->getName().c_str(), i); + continue; } - auto &in_map = input_map.at(output_layers[i]); - in_map.resize(out_layer_node->getNumInputConnections()); - in_map[j] = outputs[i]; + auto sink_node = getLayerNode(conn->getName()); + [[maybe_unused]] auto [it, b] = + input_map.try_emplace({sink_node->getName(), {}}); + + NNTR_THROW_IF(sink_node->getInputConnectionName(conn->getIndex()) != + lnode->getName(), + std::invalid_argument) + << "node pair does not match between " << lnode->getName() << ' ' + << sink_node->getName(); + + auto &sink_tensors = it->second; + sink_tensors.resize(sink_node->getNumInputConnections()); + sink_tensors[conn->getIndex()] = outputs[i]; } } diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 84be39da..e2821be2 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -169,7 +169,8 @@ LayerNode::LayerNode(std::unique_ptr &&l) : inplace(InPlace::NONE), needs_calc_derivative(false), needs_calc_gradient(false), - output_layers(), + output_connections(), + effective_output_connection_size(0), run_context(nullptr), layer_node_props( new PropsType(props::Name(), props::Distribute(), props::Trainable(), {}, @@ -222,13 +223,23 @@ void LayerNode::setInputConnectionName(unsigned nth, const std::string &name) { input_layers.at(nth).get().getName() = name; } +const Connection *LayerNode::getOutputConnection(unsigned nth) const { + return output_connections.at(nth).get(); +} + void LayerNode::setOutputConnection(unsigned nth, const std::string &name, unsigned index) { - if (nth >= output_layers.size()) { - output_layers.resize(nth + 1); + if (nth >= output_connections.size()) { + output_connections.resize(nth + 1); } - output_layers[nth] = std::make_unique(name, index); + auto &con = output_connections[nth]; + NNTR_THROW_IF(con, std::invalid_argument) + << "cannot override connection, this slot is reserved for " + << con->toString(); + + effective_output_connection_size++; + con = std::make_unique(name, index); } const std::string LayerNode::getName() const noexcept { @@ -238,7 +249,7 @@ const std::string LayerNode::getName() const noexcept { std::ostream &operator<<(std::ostream &out, const LayerNode &l) { - auto &input_layers = + auto &input_connections = std::get>(*l.layer_node_props); out << "[" << l.getName() << '/' << l.getType() << "]\n"; @@ -250,8 +261,8 @@ std::ostream &operator<<(std::ostream &out, const LayerNode &l) { out << '\n'; }; - print_vector(input_layers, " input_layers"); - // print_vector(l.output_layers, "output_layers"); + print_vector(input_connections, " input_connections"); + // print_vector(l.output_connections, "output_connections"); return out; } @@ -265,21 +276,21 @@ ActivationType LayerNode::getActivationType() const { } unsigned int LayerNode::getNumInputConnections() const { - auto &input_layers = + auto &input_conns = std::get>(*layer_node_props); - return input_layers.size(); + return input_conns.size(); } unsigned int LayerNode::getNumOutputConnections() const { - return output_layers.size(); + return output_connections.size(); } const std::vector LayerNode::getInputLayers() const { - auto &input_layers = + auto &input_connections = std::get>(*layer_node_props); std::vector names; - names.reserve(input_layers.size()); - std::transform(input_layers.begin(), input_layers.end(), + names.reserve(input_connections.size()); + std::transform(input_connections.begin(), input_connections.end(), std::back_inserter(names), [](const Connection &con) { return con.getName(); }); return names; @@ -287,14 +298,14 @@ const std::vector LayerNode::getInputLayers() const { const std::vector LayerNode::getOutputLayers() const { std::vector names; - names.reserve(output_layers.size()); + names.reserve(output_connections.size()); - for (auto &output_layer : output_layers) { - if (output_layer == nullptr) { + for (auto &conn : output_connections) { + if (conn == nullptr) { ml_logw("intermediate output is empty for layer: %s", getName().c_str()); continue; } - names.push_back(output_layer->getName()); + names.push_back(conn->getName()); } return names; } @@ -355,33 +366,13 @@ nntrainer::Layer *LayerNode::getLayer() { return layer.get(); } -void LayerNode::addInputLayers(const std::string &in_layer) { - auto &input_layers = - std::get>(*layer_node_props); - input_layers.emplace_back(Connection(in_layer, 0)); -} - -void LayerNode::addOutputLayers(const std::string &out_layer) { - output_layers.emplace_back(new Connection(out_layer, 0)); -} - -void LayerNode::setInputLayers(const std::vector &layers) { - auto &input_layers = - std::get>(*layer_node_props); - input_layers.clear(); - input_layers.reserve(layers.size()); - std::transform(layers.begin(), layers.end(), std::back_inserter(input_layers), - [](const std::string &id) { - return Connection{id, 0}; - }); -} - void LayerNode::setOutputLayers(const std::vector &layers) { - output_layers.clear(); - output_layers.reserve(layers.size()); + output_connections.clear(); + output_connections.reserve(layers.size()); std::transform( - layers.begin(), layers.end(), std::back_inserter(output_layers), - [](const std::string &id) { return std::make_unique(id); }); + layers.begin(), layers.end(), std::back_inserter(output_connections), + [this](const std::string &id) { return std::make_unique(id); }); + effective_output_connection_size = layers.size(); } bool LayerNode::hasInputShapeProperty() const { @@ -424,9 +415,7 @@ const std::vector LayerNode::getOutputDimensions() const { void LayerNode::exportTo(Exporter &exporter, const ExportMethods &method) const { exporter.saveResult(*layer_node_props, method, this); - // TODO: update getLayer() for layerv2 and use getLayer() layer->exportTo(exporter, method); - /// have layer_v2 implementation } void LayerNode::read(std::ifstream &file) { @@ -514,10 +503,24 @@ InitLayerContext LayerNode::finalize(const std::vector &input_dims) { layer_node_props_realization = std::make_unique( props::Flatten(), props::Activation()); - auto num_outputs = output_layers.size(); - if (output_layers.empty()) { - num_outputs = 1; - } + /// if intermediate node is not used anywhere, it means we need delicate + /// handling, including interface change for init layer context because layer + /// need to know which output is a dangling node, it is rather better to + /// assume this is a buggy behavior + /// if the output is possibly optional (for example, lstmcell returns hidden, + /// cell) but cell might not be used else where. this can be easily checked by + /// putting cell to the first output. In this case, there is no intermediate + /// node unidentified it will pass this check while lstmcell can query by + /// checking number of outputs from context + NNTR_THROW_IF(getNumOutputConnections() != effective_output_connection_size, + std::invalid_argument) + << "Intermediate node is not used anywhere for node: " << getName() + << " num output connection: " << getNumOutputConnections() + << " effective_output_connection: " << effective_output_connection_size; + + auto num_outputs = effective_output_connection_size == 0 + ? 1 + : effective_output_connection_size; auto scope = getSharedFrom().empty() ? getName() : getSharedFrom(); float max_norm = 0.0; @@ -714,7 +717,7 @@ void LayerNode::remapConnections( remap_fn(name, idx); } - for (auto &output_layer : output_layers) { + for (auto &output_layer : output_connections) { if (output_layer == nullptr) { continue; } diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 2bda7fa1..bb24fe8a 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -15,10 +15,10 @@ * GraphNode. Each layer is wrapped with LayerNode in order to add it to a * graph. Each LayerNode contains only 1 layer inside. LayerNode also intercepts * certain properties of the layer which are either related to graph related - * connections (input_layers, output_layers, activation, flatten, distribute, - * name) or essential for the description of the layer (trainable, input_dims) - * iself. These properties, if needed by the layer object, are provided access - * to via LayerContext. + * connections (input_connections, output_connections, activation, flatten, + * distribute, name) or essential for the description of the layer (trainable, + * input_dims) iself. These properties, if needed by the layer object, are + * provided access to via LayerContext. */ #ifndef __LAYER_NODE_H__ @@ -166,6 +166,15 @@ public: */ void setInputConnectionName(unsigned nth, const std::string &name); + /** + * @brief Get the output connection object + * + * @param nth nth input + * @throws if nth is out of range of getNumOutputConnection() + * @return Connection * view of a connection, null means this does not exist + */ + const Connection *getOutputConnection(unsigned nth) const; + /** * @brief Set the Output Connection * @note Each output must be identified only ONCE. @@ -410,34 +419,6 @@ public: return run_context->getNumWeights(); } - /** - * @brief Get the Output Layers object - * - * @return const std::vector - */ - const std::vector getOutputLayers() const; - - /** - * @brief Add name to the input layers - * - * @param in_layer Name to be added - */ - void addInputLayers(const std::string &in_layer); - - /** - * @brief Add name to the output layers - * - * @param out_layer Name to be added - */ - void addOutputLayers(const std::string &out_layer); - - /** - * @brief Set the Input Layers object - * - * @param layers Name of the layers - */ - void setInputLayers(const std::vector &layers); - /** * @brief Set the Output Layers object * @@ -749,6 +730,14 @@ private: */ const std::vector getInputLayers() const; +public: + /** + * @brief Get the Output Layers object + * + * @return const std::vector + */ + const std::vector getOutputLayers() const; + std::unique_ptr layer; /**< The actual object in the graph node */ @@ -759,7 +748,11 @@ private: bool needs_calc_gradient; /**< cache if this layer needs to do calcGradient */ std::vector> - output_layers; /**< output layer names */ + output_connections; /**< output layer names */ + unsigned effective_output_connection_size; /**< effective output connection + size, this skips not connected + slot, so this number can be + diffrent from num_outputs() */ std::unique_ptr run_context; /**< context required for running/execution of the layer. This -- 2.34.1